[llvm] [GISel][RISCV]Implement indirect parameter passing for large scalars (PR #95429)
Gábor Spaits via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 14 15:04:30 PDT 2024
https://github.com/spaits updated https://github.com/llvm/llvm-project/pull/95429
>From 3db3254691292a7793877d8bcc28351060598ca9 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 15:50:34 +0200
Subject: [PATCH 01/15] [GISel][RISCV]Implement indirect parameter passing for
large scalars
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 61 +++++--
.../Target/RISCV/GISel/RISCVCallLowering.cpp | 15 +-
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 167 ++++++++++++++++++
.../calling-conv-lp64-lp64f-lp64d-common.ll | 79 +++++++++
4 files changed, 302 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 412cd0a21ad41..daa5e0c07e9b7 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -751,6 +751,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy;
const EVT OrigVT = EVT::getEVT(Args[i].Ty);
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
+ const LLT PointerTy = LLT::pointer(0, DL.getPointerSizeInBits(0));
// Expected to be multiple regs for a single incoming arg.
// There should be Regs.size() ArgLocs per argument.
@@ -764,19 +765,28 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// If we can't directly assign the register, we need one or more
// intermediate values.
Args[i].Regs.resize(NumParts);
-
- // For each split register, create and assign a vreg that will store
- // the incoming component of the larger value. These will later be
- // merged to form the final vreg.
- for (unsigned Part = 0; Part < NumParts; ++Part)
- Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
+
+ // When we have indirect parameter passing we are receiving a pointer,
+ // that points to the actual value. In that case we need a pointer.
+ if (VA.getLocInfo() == CCValAssign::Indirect &&
+ Args[i].Flags[0].isSplit()) {
+ if (Handler.isIncomingArgumentHandler())
+ Args[i].Regs[0] = MRI.createGenericVirtualRegister(PointerTy);
+ } else {
+ // For each split register, create and assign a vreg that will store
+ // the incoming component of the larger value. These will later be
+ // merged to form the final vreg.
+ for (unsigned Part = 0; Part < NumParts; ++Part)
+ Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
+ }
}
assert((j + (NumParts - 1)) < ArgLocs.size() &&
"Too many regs for number of args");
// Coerce into outgoing value types before register assignment.
- if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy) {
+ if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy &&
+ VA.getLocInfo() != CCValAssign::Indirect) {
assert(Args[i].OrigRegs.size() == 1);
buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
ValTy, extendOpFromFlags(Args[i].Flags[0]));
@@ -790,6 +800,28 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
CCValAssign &VA = ArgLocs[j + Idx];
const ISD::ArgFlagsTy Flags = Args[i].Flags[Part];
+ // We found an indirect parameter passing and we are at the first part of
+ // the value being passed. In this case copy the incoming pointer into a
+ // virtual register so later we can load it.
+ if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
+ if (Handler.isIncomingArgumentHandler())
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ else {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
+ Align(8), false);
+
+ auto PointerToStackReg =
+ MIRBuilder.buildFrameIndex(PointerTy, FrameIdx)
+ ->getOperand(0)
+ .getReg();
+ Handler.assignValueToAddress(Args[i].OrigRegs[Part], PointerToStackReg,
+ OrigTy, MachinePointerInfo{}, VA);
+ Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
+ }
+ break;
+ }
+
if (VA.isMemLoc() && !Flags.isByVal()) {
// Individual pieces may have been spilled to the stack and others
// passed in registers.
@@ -866,11 +898,16 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
}
- // Now that all pieces have been assigned, re-pack the register typed values
- // into the original value typed registers.
- if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
- // Merge the split registers into the expected larger result vregs of
- // the original call.
+ // In case of indirect parameter passing load the value referred to by
+ // the argument.
+ if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
+ VA.getLocInfo() == CCValAssign::Indirect) {
+ Handler.assignValueToAddress(Args[i].OrigRegs[0], Args[i].Regs[0], OrigTy,
+ MachinePointerInfo{}, VA);
+
+ } else if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
+ // Now that all pieces have been assigned, re-pack the register typed values
+ // into the original value typed registers.
buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
LocTy, Args[i].Flags[0]);
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 2bfee45852b20..b1f381f4b30ad 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -89,12 +89,13 @@ struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
const MachinePointerInfo &MPO,
const CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
- uint64_t LocMemOffset = VA.getLocMemOffset();
-
+ uint64_t Offset = 0;
+ if (VA.isMemLoc())
+ Offset = VA.getLocMemOffset();
+
// TODO: Move StackAlignment to subtarget and share with FrameLowering.
- auto MMO =
- MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
- commonAlignment(Align(16), LocMemOffset));
+ auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
+ commonAlignment(Align(16), Offset));
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
@@ -341,10 +342,8 @@ static bool isLegalElementTypeForRVV(Type *EltTy,
// TODO: Remove IsLowerArgs argument by adding support for vectors in lowerCall.
static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget,
bool IsLowerArgs = false) {
- // TODO: Integers larger than 2*XLen are passed indirectly which is not
- // supported yet.
if (T->isIntegerTy())
- return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
+ return true;
if (T->isHalfTy() || T->isFloatTy() || T->isDoubleTy())
return true;
if (T->isPointerTy())
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 1a3489521af19..92f5f6220f096 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -16,6 +16,173 @@
; Check that on RV32, i64 is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.
+define i64 @callee_128i_in_regs(i128 %x, i128 %y ) {
+ ; RV32I-LABEL: name: callee_128i_in_regs
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $x10, $x11
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128), align 1)
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128), align 1)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s128)
+ ; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+ ; RV32I-NEXT: $x10 = COPY [[UV]](s32)
+ ; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
+ %2 = trunc i128 %x to i64
+ ret i64 %2
+}
+
+define i32 @caller_128i_in_regs( ) {
+ ; ILP32-LABEL: name: caller_128i_in_regs
+ ; ILP32: bb.1 (%ir-block.0):
+ ; ILP32-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1
+ ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32F-LABEL: name: caller_128i_in_regs
+ ; ILP32F: bb.1 (%ir-block.0):
+ ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1
+ ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32F-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32F-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32F-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32F-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32D-LABEL: name: caller_128i_in_regs
+ ; ILP32D: bb.1 (%ir-block.0):
+ ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1
+ ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32D-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32D-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32D-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32D-NEXT: PseudoRET implicit $x10
+ %1 = call i64 @callee_128i_in_regs(i128 1, i128 2)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
+define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
+
+ ; RV32I-LABEL: name: callee_256i_in_regs
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $x10, $x11
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 1)
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 1)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
+ ; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+ ; RV32I-NEXT: $x10 = COPY [[UV]](s32)
+ ; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
+ %2 = trunc i256 %x to i64
+ ret i64 %2
+}
+
+define i32 @caller_256i_in_regs( ) {
+ ; ILP32-LABEL: name: caller_256i_in_regs
+ ; ILP32: bb.1 (%ir-block.0):
+ ; ILP32-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32F-LABEL: name: caller_256i_in_regs
+ ; ILP32F: bb.1 (%ir-block.0):
+ ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32F-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32F-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32F-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32F-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32D-LABEL: name: caller_256i_in_regs
+ ; ILP32D: bb.1 (%ir-block.0):
+ ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32D-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32D-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32D-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32D-NEXT: PseudoRET implicit $x10
+ %1 = call i64 @callee_256i_in_regs(i256 1, i256 2)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
define i32 @callee_i64_in_regs(i32 %a, i64 %b) nounwind {
; RV32I-LABEL: name: callee_i64_in_regs
; RV32I: bb.1 (%ir-block.0):
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index b175b8d92e6c9..0467bbe3d41bb 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -16,6 +16,85 @@
; Check that on RV64, i128 is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.
+define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
+
+ ; RV64I-LABEL: name: callee_256i_in_regs
+ ; RV64I: bb.1 (%ir-block.0):
+ ; RV64I-NEXT: liveins: $x10, $x11
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 1)
+ ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 1)
+ ; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
+ ; RV64I-NEXT: $x10 = COPY [[TRUNC]](s64)
+ ; RV64I-NEXT: PseudoRET implicit $x10
+ %2 = trunc i256 %x to i64
+ ret i64 %2
+}
+
+define i32 @caller_256i_in_regs( ) {
+ ; LP64-LABEL: name: caller_256i_in_regs
+ ; LP64: bb.1 (%ir-block.0):
+ ; LP64-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; LP64-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; LP64-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; LP64-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; LP64-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; LP64-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; LP64-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; LP64-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; LP64-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; LP64-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; LP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; LP64-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; LP64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+ ; LP64-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; LP64-NEXT: PseudoRET implicit $x10
+ ;
+ ; LP64F-LABEL: name: caller_256i_in_regs
+ ; LP64F: bb.1 (%ir-block.0):
+ ; LP64F-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; LP64F-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; LP64F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; LP64F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; LP64F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; LP64F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; LP64F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; LP64F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; LP64F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; LP64F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; LP64F-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; LP64F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; LP64F-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+ ; LP64F-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; LP64F-NEXT: PseudoRET implicit $x10
+ ;
+ ; LP64D-LABEL: name: caller_256i_in_regs
+ ; LP64D: bb.1 (%ir-block.0):
+ ; LP64D-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; LP64D-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; LP64D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; LP64D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; LP64D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; LP64D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; LP64D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; LP64D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; LP64D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; LP64D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; LP64D-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; LP64D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; LP64D-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+ ; LP64D-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; LP64D-NEXT: PseudoRET implicit $x10
+ %1 = call i64 @callee_256i_in_regs(i256 1, i256 2)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
define i64 @callee_i128_in_regs(i64 %a, i128 %b) nounwind {
; RV64I-LABEL: name: callee_i128_in_regs
; RV64I: bb.1 (%ir-block.0):
>From 7f0647c34bc342ff3fbf555fa658c973b6c7ffeb Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 19:58:49 +0200
Subject: [PATCH 02/15] Move code responsible for loading one level deeper
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 23 +++++++++-----------
1 file changed, 10 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index daa5e0c07e9b7..dd3f6d29524c9 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -765,7 +765,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// If we can't directly assign the register, we need one or more
// intermediate values.
Args[i].Regs.resize(NumParts);
-
+
// When we have indirect parameter passing we are receiving a pointer,
// that points to the actual value. In that case we need a pointer.
if (VA.getLocInfo() == CCValAssign::Indirect &&
@@ -804,9 +804,12 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// the value being passed. In this case copy the incoming pointer into a
// virtual register so later we can load it.
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
- if (Handler.isIncomingArgumentHandler())
+ if (Handler.isIncomingArgumentHandler()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
- else {
+ Handler.assignValueToAddress(Args[i].OrigRegs[Part],
+ Args[i].Regs[Part], OrigTy,
+ MachinePointerInfo{}, VA);
+ } else {
MachineFrameInfo &MFI = MF.getFrameInfo();
int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
Align(8), false);
@@ -815,8 +818,9 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MIRBuilder.buildFrameIndex(PointerTy, FrameIdx)
->getOperand(0)
.getReg();
- Handler.assignValueToAddress(Args[i].OrigRegs[Part], PointerToStackReg,
- OrigTy, MachinePointerInfo{}, VA);
+ Handler.assignValueToAddress(Args[i].OrigRegs[Part],
+ PointerToStackReg, OrigTy,
+ MachinePointerInfo{}, VA);
Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
}
break;
@@ -898,14 +902,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
}
- // In case of indirect parameter passing load the value referred to by
- // the argument.
- if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
- VA.getLocInfo() == CCValAssign::Indirect) {
- Handler.assignValueToAddress(Args[i].OrigRegs[0], Args[i].Regs[0], OrigTy,
- MachinePointerInfo{}, VA);
-
- } else if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
+ if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
// Now that all pieces have been assigned, re-pack the register typed values
// into the original value typed registers.
buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
>From 0def404cba2915c05edb6d78c01d3e7838daec5f Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 20:49:08 +0200
Subject: [PATCH 03/15] Don't try to merge values when indirect parameter
passing has happened
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index dd3f6d29524c9..b4848383548d6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -791,7 +791,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
ValTy, extendOpFromFlags(Args[i].Flags[0]));
}
-
+ bool IndirectParameterPassingHandled = false;
bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL);
for (unsigned Part = 0; Part < NumParts; ++Part) {
Register ArgReg = Args[i].Regs[Part];
@@ -804,6 +804,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// the value being passed. In this case copy the incoming pointer into a
// virtual register so later we can load it.
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
+ IndirectParameterPassingHandled = true;
if (Handler.isIncomingArgumentHandler()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
Handler.assignValueToAddress(Args[i].OrigRegs[Part],
@@ -902,9 +903,10 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
}
- if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
- // Now that all pieces have been assigned, re-pack the register typed values
- // into the original value typed registers.
+ if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
+ !IndirectParameterPassingHandled) {
+ // Now that all pieces have been assigned, re-pack the register typed
+ // values into the original value typed registers.
buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
LocTy, Args[i].Flags[0]);
}
>From 28f66143c406956894b8868cef8eb6f47c965aa8 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 21:02:22 +0200
Subject: [PATCH 04/15] Use proper stack alignment
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index b4848383548d6..c1ad0f6f654e8 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -811,9 +811,11 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
Args[i].Regs[Part], OrigTy,
MachinePointerInfo{}, VA);
} else {
+ Align StackAlign =
+ MF.getSubtarget().getFrameLowering()->getStackAlign();
MachineFrameInfo &MFI = MF.getFrameInfo();
int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
- Align(8), false);
+ StackAlign, false);
auto PointerToStackReg =
MIRBuilder.buildFrameIndex(PointerTy, FrameIdx)
>From 6d22315eb70283b01331d53f63da2e3f72a983cf Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 21:07:03 +0200
Subject: [PATCH 05/15] Format
---
llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index b1f381f4b30ad..180e238a0a978 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -92,10 +92,10 @@ struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
uint64_t Offset = 0;
if (VA.isMemLoc())
Offset = VA.getLocMemOffset();
-
+
// TODO: Move StackAlignment to subtarget and share with FrameLowering.
auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
- commonAlignment(Align(16), Offset));
+ commonAlignment(Align(16), Offset));
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
>From fc4c6e63d1e6b2d6c8222821574a6a73db7c2c2e Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 22:30:03 +0200
Subject: [PATCH 06/15] Use alloca address space
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index c1ad0f6f654e8..53da5c68ca32c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -751,7 +751,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy;
const EVT OrigVT = EVT::getEVT(Args[i].Ty);
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
- const LLT PointerTy = LLT::pointer(0, DL.getPointerSizeInBits(0));
+ const LLT PointerTy = LLT::pointer(MF.getDataLayout().getAllocaAddrSpace(),
+ DL.getPointerSizeInBits(0));
// Expected to be multiple regs for a single incoming arg.
// There should be Regs.size() ArgLocs per argument.
>From da2e5aed1abd255f75e093d22d5c2f2fb5c88973 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 22:35:45 +0200
Subject: [PATCH 07/15] Use the preferred OrigTy alignment
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 53da5c68ca32c..20952c4a4bef6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -812,8 +812,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
Args[i].Regs[Part], OrigTy,
MachinePointerInfo{}, VA);
} else {
- Align StackAlign =
- MF.getSubtarget().getFrameLowering()->getStackAlign();
+ Align StackAlign = DL.getPrefTypeAlign(Args[i].Ty);
MachineFrameInfo &MFI = MF.getFrameInfo();
int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
StackAlign, false);
>From 9d9d01dd8aaae6f223b57135ae9e0d384e2e0d3e Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 22:39:26 +0200
Subject: [PATCH 08/15] Use alloca address space part2
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 20952c4a4bef6..5c59804775fae 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -743,6 +743,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
continue;
}
+ auto AllocaAddressSpace = MF.getDataLayout().getAllocaAddrSpace();
+
const MVT ValVT = VA.getValVT();
const MVT LocVT = VA.getLocVT();
@@ -751,8 +753,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy;
const EVT OrigVT = EVT::getEVT(Args[i].Ty);
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
- const LLT PointerTy = LLT::pointer(MF.getDataLayout().getAllocaAddrSpace(),
- DL.getPointerSizeInBits(0));
+ const LLT PointerTy = LLT::pointer(
+ AllocaAddressSpace, DL.getPointerSizeInBits(AllocaAddressSpace));
// Expected to be multiple regs for a single incoming arg.
// There should be Regs.size() ArgLocs per argument.
>From 4515c81681669c27761fd36005aa6014fb66975c Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 14 Jun 2024 09:48:00 +0200
Subject: [PATCH 09/15] Simplify G_FRAME_INDEX creation
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 5c59804775fae..f9564b33672ce 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -819,10 +820,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
StackAlign, false);
- auto PointerToStackReg =
- MIRBuilder.buildFrameIndex(PointerTy, FrameIdx)
- ->getOperand(0)
- .getReg();
+ Register PointerToStackReg =
+ MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
Handler.assignValueToAddress(Args[i].OrigRegs[Part],
PointerToStackReg, OrigTy,
MachinePointerInfo{}, VA);
>From 9ae6859eff375ea0c0994a0e4cde49e4eb42640f Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 14 Jun 2024 10:44:57 +0200
Subject: [PATCH 10/15] Do not dispatch loads and stores to Handler
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 16 ++++++----
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 32 +++++++++----------
.../calling-conv-lp64-lp64f-lp64d-common.ll | 4 +--
3 files changed, 28 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index f9564b33672ce..243ad1e083780 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -811,9 +811,9 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
IndirectParameterPassingHandled = true;
if (Handler.isIncomingArgumentHandler()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
- Handler.assignValueToAddress(Args[i].OrigRegs[Part],
- Args[i].Regs[Part], OrigTy,
- MachinePointerInfo{}, VA);
+ Align Alignment = DL.getABITypeAlign(Args[i].Ty);
+ MachinePointerInfo DstMPO;
+ MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], DstMPO, Alignment);
} else {
Align StackAlign = DL.getPrefTypeAlign(Args[i].Ty);
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -822,9 +822,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
Register PointerToStackReg =
MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
- Handler.assignValueToAddress(Args[i].OrigRegs[Part],
- PointerToStackReg, OrigTy,
- MachinePointerInfo{}, VA);
+
+ MachinePointerInfo DstMPO;
+ Align DstAlign = std::max(Flags.getNonZeroOrigAlign(),
+ inferAlignFromPtrInfo(MF, DstMPO));
+ MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg,
+ DstMPO, DstAlign);
+
Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
}
break;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 92f5f6220f096..3803c044372fc 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -22,9 +22,9 @@ define i64 @callee_128i_in_regs(i128 %x, i128 %y ) {
; RV32I-NEXT: liveins: $x10, $x11
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
- ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128), align 1)
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128), align 8)
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
- ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128), align 1)
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128), align 8)
; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s128)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
@@ -41,10 +41,10 @@ define i32 @caller_128i_in_regs( ) {
; ILP32-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128), align 8)
; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -61,10 +61,10 @@ define i32 @caller_128i_in_regs( ) {
; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128), align 8)
; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -81,10 +81,10 @@ define i32 @caller_128i_in_regs( ) {
; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128), align 8)
; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -106,9 +106,9 @@ define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
; RV32I-NEXT: liveins: $x10, $x11
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
- ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 1)
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 8)
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
- ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 1)
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 8)
; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
@@ -125,10 +125,10 @@ define i32 @caller_256i_in_regs( ) {
; ILP32-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 8)
; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 8)
; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -145,10 +145,10 @@ define i32 @caller_256i_in_regs( ) {
; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 8)
; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 8)
; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -165,10 +165,10 @@ define i32 @caller_256i_in_regs( ) {
; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 8)
; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 8)
; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index 0467bbe3d41bb..caeb705039abf 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -23,9 +23,9 @@ define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
; RV64I-NEXT: liveins: $x10, $x11
; RV64I-NEXT: {{ $}}
; RV64I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
- ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 1)
+ ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 16)
; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
- ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 1)
+ ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 16)
; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
; RV64I-NEXT: $x10 = COPY [[TRUNC]](s64)
; RV64I-NEXT: PseudoRET implicit $x10
>From 32e991cc33166dbe909eabc2899321a53a76de00 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 14 Jun 2024 12:55:33 +0200
Subject: [PATCH 11/15] Receive indirect args from the stack
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 24 +++++++-
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 56 +++++++++++++++++++
2 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 243ad1e083780..4eb4a2230aac3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "call-lowering"
@@ -810,7 +811,17 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
IndirectParameterPassingHandled = true;
if (Handler.isIncomingArgumentHandler()) {
- Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ Register PhysReg;
+ if (VA.isRegLoc()) {
+ PhysReg = VA.getLocReg();
+ } else if (VA.isMemLoc()) {
+ LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
+
+ MachinePointerInfo MPO;
+ PhysReg = Handler.getStackAddress(
+ MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
+ }
+ Handler.assignValueToReg(ArgReg, PhysReg, VA);
Align Alignment = DL.getABITypeAlign(Args[i].Ty);
MachinePointerInfo DstMPO;
MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], DstMPO, Alignment);
@@ -824,8 +835,15 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
MachinePointerInfo DstMPO;
- Align DstAlign = std::max(Flags.getNonZeroOrigAlign(),
- inferAlignFromPtrInfo(MF, DstMPO));
+ Align DstAlign{};
+ Align FlagAlignment{};
+ if (Flags.isByVal()) {
+ FlagAlignment = Flags.getNonZeroByValAlign();
+ } else {
+ FlagAlignment = Flags.getNonZeroOrigAlign();
+ }
+ DstAlign = std::max(FlagAlignment,
+ inferAlignFromPtrInfo(MF, DstMPO));
MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg,
DstMPO, DstAlign);
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 3803c044372fc..8f2ec31ce4bc6 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -16,6 +16,62 @@
; Check that on RV32, i64 is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.
+define i64 @callee_128i_in_regs_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i64 %x8, i128 %y ) {
+ ; RV32I-LABEL: name: callee_128i_in_regs_stack
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $v23, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; RV32I-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
+ ; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13
+ ; RV32I-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; RV32I-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14
+ ; RV32I-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15
+ ; RV32I-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; RV32I-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16
+ ; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
+ ; RV32I-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.8
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.8, align 16)
+ ; RV32I-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.7)
+ ; RV32I-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6
+ ; RV32I-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.6, align 8)
+ ; RV32I-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5
+ ; RV32I-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.5)
+ ; RV32I-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4
+ ; RV32I-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (load (s32) from %fixed-stack.4, align 16)
+ ; RV32I-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
+ ; RV32I-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (load (s32) from %fixed-stack.3)
+ ; RV32I-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
+ ; RV32I-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (load (s32) from %fixed-stack.2, align 8)
+ ; RV32I-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+ ; RV32I-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (load (s32) from %fixed-stack.1)
+ ; RV32I-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; RV32I-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX8]](p0)
+ ; RV32I-NEXT: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[COPY8]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD8]](s128)
+ ; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+ ; RV32I-NEXT: $x10 = COPY [[UV]](s32)
+ ; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
+ %2 = trunc i128 %y to i64
+ ret i64 %2
+}
+
+define i32 @caller_128i_in_regs_stack() {
+ %1 = call i64 @callee_128i_in_regs_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i128 42)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
+
define i64 @callee_128i_in_regs(i128 %x, i128 %y ) {
; RV32I-LABEL: name: callee_128i_in_regs
; RV32I: bb.1 (%ir-block.0):
>From c730b44e49e8c0e9366ccadfd30b7f4986cc7e22 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 14 Jun 2024 14:02:50 +0200
Subject: [PATCH 12/15] Pass indirect args on the stack
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 31 +-
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 330 ++++++++++++++++++
2 files changed, 350 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 4eb4a2230aac3..266441fe2b0e0 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -810,17 +810,20 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// virtual register so later we can load it.
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
IndirectParameterPassingHandled = true;
+ bool IsInStack = false;
+ Register PhysReg;
+ if (VA.isRegLoc()) {
+ PhysReg = VA.getLocReg();
+ } else if (VA.isMemLoc()) {
+ IsInStack = true;
+ LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
+ MachinePointerInfo MPO;
+ PhysReg = Handler.getStackAddress(
+ MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
+ }
+
if (Handler.isIncomingArgumentHandler()) {
- Register PhysReg;
- if (VA.isRegLoc()) {
- PhysReg = VA.getLocReg();
- } else if (VA.isMemLoc()) {
- LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
-
- MachinePointerInfo MPO;
- PhysReg = Handler.getStackAddress(
- MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
- }
+
Handler.assignValueToReg(ArgReg, PhysReg, VA);
Align Alignment = DL.getABITypeAlign(Args[i].Ty);
MachinePointerInfo DstMPO;
@@ -844,10 +847,16 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
DstAlign = std::max(FlagAlignment,
inferAlignFromPtrInfo(MF, DstMPO));
+
MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg,
DstMPO, DstAlign);
- Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
+ if (!IsInStack) {
+ Handler.assignValueToReg(PointerToStackReg, PhysReg, VA);
+ } else {
+ MIRBuilder.buildStore(PointerToStackReg, PhysReg,
+ DstMPO, DstAlign);
+ }
}
break;
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 8f2ec31ce4bc6..c10921d1d8326 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -16,6 +16,136 @@
; Check that on RV32, i64 is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.
+define i64 @callee_128i_in_regs_stack_fst(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i128 %y ) {
+ ; RV32I-LABEL: name: callee_128i_in_regs_stack_fst
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $v3, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; RV32I-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
+ ; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13
+ ; RV32I-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; RV32I-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14
+ ; RV32I-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15
+ ; RV32I-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; RV32I-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16
+ ; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
+ ; RV32I-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; RV32I-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY8]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s128)
+ ; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+ ; RV32I-NEXT: $x10 = COPY [[UV]](s32)
+ ; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
+ %2 = trunc i128 %y to i64
+ ret i64 %2
+}
+
+define i32 @caller_128i_in_regs_stack_fst( ) {
+ ; ILP32-LABEL: name: caller_128i_in_regs_stack_fst
+ ; ILP32: bb.1 (%ir-block.0):
+ ; ILP32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32-NEXT: $x10 = COPY [[UV]](s32)
+ ; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
+ ; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
+ ; ILP32-NEXT: $x13 = COPY [[UV3]](s32)
+ ; ILP32-NEXT: $x14 = COPY [[UV4]](s32)
+ ; ILP32-NEXT: $x15 = COPY [[UV5]](s32)
+ ; ILP32-NEXT: $x16 = COPY [[UV6]](s32)
+ ; ILP32-NEXT: $x17 = COPY [[UV7]](s32)
+ ; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack_fst, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+ ; ILP32-NEXT: ADJCALLSTACKUP 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; ILP32-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32F-LABEL: name: caller_128i_in_regs_stack_fst
+ ; ILP32F: bb.1 (%ir-block.0):
+ ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32F-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32F-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32F-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32F-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
+ ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
+ ; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
+ ; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
+ ; ILP32F-NEXT: $x13 = COPY [[UV3]](s32)
+ ; ILP32F-NEXT: $x14 = COPY [[UV4]](s32)
+ ; ILP32F-NEXT: $x15 = COPY [[UV5]](s32)
+ ; ILP32F-NEXT: $x16 = COPY [[UV6]](s32)
+ ; ILP32F-NEXT: $x17 = COPY [[UV7]](s32)
+ ; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack_fst, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+ ; ILP32F-NEXT: ADJCALLSTACKUP 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32F-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32F-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; ILP32F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32F-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32F-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32D-LABEL: name: caller_128i_in_regs_stack_fst
+ ; ILP32D: bb.1 (%ir-block.0):
+ ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32D-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32D-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32D-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32D-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
+ ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
+ ; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
+ ; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
+ ; ILP32D-NEXT: $x13 = COPY [[UV3]](s32)
+ ; ILP32D-NEXT: $x14 = COPY [[UV4]](s32)
+ ; ILP32D-NEXT: $x15 = COPY [[UV5]](s32)
+ ; ILP32D-NEXT: $x16 = COPY [[UV6]](s32)
+ ; ILP32D-NEXT: $x17 = COPY [[UV7]](s32)
+ ; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack_fst, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+ ; ILP32D-NEXT: ADJCALLSTACKUP 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32D-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32D-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; ILP32D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32D-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32D-NEXT: PseudoRET implicit $x10
+ %1 = call i64 @callee_128i_in_regs_stack_fst(i64 1,i64 1, i64 1, i64 1, i128 2)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
define i64 @callee_128i_in_regs_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i64 %x8, i128 %y ) {
; RV32I-LABEL: name: callee_128i_in_regs_stack
; RV32I: bb.1 (%ir-block.0):
@@ -66,6 +196,206 @@ define i64 @callee_128i_in_regs_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x4, i64 %x
}
define i32 @caller_128i_in_regs_stack() {
+ ; ILP32-LABEL: name: caller_128i_in_regs_stack
+ ; ILP32: bb.1 (%ir-block.0):
+ ; ILP32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; ILP32-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; ILP32-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; ILP32-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; ILP32-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; ILP32-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; ILP32-NEXT: [[C8:%[0-9]+]]:_(s128) = G_CONSTANT i128 42
+ ; ILP32-NEXT: ADJCALLSTACKDOWN 36, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+ ; ILP32-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
+ ; ILP32-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64)
+ ; ILP32-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+ ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s32)
+ ; ILP32-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+ ; ILP32-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; ILP32-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s32)
+ ; ILP32-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4)
+ ; ILP32-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+ ; ILP32-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; ILP32-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s32)
+ ; ILP32-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 8)
+ ; ILP32-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; ILP32-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s32)
+ ; ILP32-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12)
+ ; ILP32-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+ ; ILP32-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; ILP32-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s32)
+ ; ILP32-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 16)
+ ; ILP32-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; ILP32-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s32)
+ ; ILP32-NEXT: G_STORE [[UV13]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20)
+ ; ILP32-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+ ; ILP32-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; ILP32-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s32)
+ ; ILP32-NEXT: G_STORE [[UV14]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 8)
+ ; ILP32-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+ ; ILP32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
+ ; ILP32-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
+ ; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32-NEXT: $x10 = COPY [[UV]](s32)
+ ; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
+ ; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
+ ; ILP32-NEXT: $x13 = COPY [[UV3]](s32)
+ ; ILP32-NEXT: $x14 = COPY [[UV4]](s32)
+ ; ILP32-NEXT: $x15 = COPY [[UV5]](s32)
+ ; ILP32-NEXT: $x16 = COPY [[UV6]](s32)
+ ; ILP32-NEXT: $x17 = COPY [[UV7]](s32)
+ ; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+ ; ILP32-NEXT: ADJCALLSTACKUP 36, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; ILP32-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32F-LABEL: name: caller_128i_in_regs_stack
+ ; ILP32F: bb.1 (%ir-block.0):
+ ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; ILP32F-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; ILP32F-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; ILP32F-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; ILP32F-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; ILP32F-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; ILP32F-NEXT: [[C8:%[0-9]+]]:_(s128) = G_CONSTANT i128 42
+ ; ILP32F-NEXT: ADJCALLSTACKDOWN 36, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32F-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+ ; ILP32F-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
+ ; ILP32F-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64)
+ ; ILP32F-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+ ; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32F-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+ ; ILP32F-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; ILP32F-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4)
+ ; ILP32F-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+ ; ILP32F-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; ILP32F-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 8)
+ ; ILP32F-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; ILP32F-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12)
+ ; ILP32F-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+ ; ILP32F-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; ILP32F-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 16)
+ ; ILP32F-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; ILP32F-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV13]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20)
+ ; ILP32F-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+ ; ILP32F-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; ILP32F-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV14]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 8)
+ ; ILP32F-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+ ; ILP32F-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
+ ; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+ ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
+ ; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
+ ; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
+ ; ILP32F-NEXT: $x13 = COPY [[UV3]](s32)
+ ; ILP32F-NEXT: $x14 = COPY [[UV4]](s32)
+ ; ILP32F-NEXT: $x15 = COPY [[UV5]](s32)
+ ; ILP32F-NEXT: $x16 = COPY [[UV6]](s32)
+ ; ILP32F-NEXT: $x17 = COPY [[UV7]](s32)
+ ; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+ ; ILP32F-NEXT: ADJCALLSTACKUP 36, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32F-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32F-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; ILP32F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32F-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32F-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32D-LABEL: name: caller_128i_in_regs_stack
+ ; ILP32D: bb.1 (%ir-block.0):
+ ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; ILP32D-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; ILP32D-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; ILP32D-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; ILP32D-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; ILP32D-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; ILP32D-NEXT: [[C8:%[0-9]+]]:_(s128) = G_CONSTANT i128 42
+ ; ILP32D-NEXT: ADJCALLSTACKDOWN 36, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32D-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+ ; ILP32D-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
+ ; ILP32D-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64)
+ ; ILP32D-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+ ; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32D-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+ ; ILP32D-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; ILP32D-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4)
+ ; ILP32D-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+ ; ILP32D-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; ILP32D-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 8)
+ ; ILP32D-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; ILP32D-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12)
+ ; ILP32D-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+ ; ILP32D-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; ILP32D-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 16)
+ ; ILP32D-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; ILP32D-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV13]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20)
+ ; ILP32D-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+ ; ILP32D-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; ILP32D-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV14]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 8)
+ ; ILP32D-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+ ; ILP32D-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
+ ; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+ ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
+ ; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
+ ; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
+ ; ILP32D-NEXT: $x13 = COPY [[UV3]](s32)
+ ; ILP32D-NEXT: $x14 = COPY [[UV4]](s32)
+ ; ILP32D-NEXT: $x15 = COPY [[UV5]](s32)
+ ; ILP32D-NEXT: $x16 = COPY [[UV6]](s32)
+ ; ILP32D-NEXT: $x17 = COPY [[UV7]](s32)
+ ; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+ ; ILP32D-NEXT: ADJCALLSTACKUP 36, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32D-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32D-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; ILP32D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32D-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32D-NEXT: PseudoRET implicit $x10
%1 = call i64 @callee_128i_in_regs_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i128 42)
%2 = trunc i64 %1 to i32
ret i32 %2
>From fe772d8c4f948da04e90ac74020829760873bcf8 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 14 Jun 2024 23:32:49 +0200
Subject: [PATCH 13/15] Fix live-in virtual registers and restructure code
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 132 +++++++++---------
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 52 +++----
2 files changed, 95 insertions(+), 89 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 266441fe2b0e0..86f11a1aa2d59 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -808,57 +808,51 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// We found an indirect parameter passing and we are at the first part of
// the value being passed. In this case copy the incoming pointer into a
// virtual register so later we can load it.
- if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
- IndirectParameterPassingHandled = true;
- bool IsInStack = false;
- Register PhysReg;
- if (VA.isRegLoc()) {
- PhysReg = VA.getLocReg();
- } else if (VA.isMemLoc()) {
- IsInStack = true;
- LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
- MachinePointerInfo MPO;
- PhysReg = Handler.getStackAddress(
- MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
- }
+ if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
+ !Handler.isIncomingArgumentHandler()) {
+ Align StackAlign = DL.getPrefTypeAlign(Args[i].Ty);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
+ StackAlign, false);
- if (Handler.isIncomingArgumentHandler()) {
-
- Handler.assignValueToReg(ArgReg, PhysReg, VA);
- Align Alignment = DL.getABITypeAlign(Args[i].Ty);
- MachinePointerInfo DstMPO;
- MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], DstMPO, Alignment);
+ Register PointerToStackReg =
+ MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
+
+ MachinePointerInfo DstMPO;
+
+ Align FlagAlignment{};
+ if (Flags.isByVal()) {
+ FlagAlignment = Flags.getNonZeroByValAlign();
} else {
- Align StackAlign = DL.getPrefTypeAlign(Args[i].Ty);
- MachineFrameInfo &MFI = MF.getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
- StackAlign, false);
-
- Register PointerToStackReg =
- MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
-
- MachinePointerInfo DstMPO;
- Align DstAlign{};
- Align FlagAlignment{};
- if (Flags.isByVal()) {
- FlagAlignment = Flags.getNonZeroByValAlign();
- } else {
- FlagAlignment = Flags.getNonZeroOrigAlign();
- }
- DstAlign = std::max(FlagAlignment,
- inferAlignFromPtrInfo(MF, DstMPO));
-
- MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg,
- DstMPO, DstAlign);
-
- if (!IsInStack) {
- Handler.assignValueToReg(PointerToStackReg, PhysReg, VA);
- } else {
- MIRBuilder.buildStore(PointerToStackReg, PhysReg,
- DstMPO, DstAlign);
- }
+ FlagAlignment = Flags.getNonZeroOrigAlign();
}
- break;
+ Align DstAlign = std::max(FlagAlignment, inferAlignFromPtrInfo(MF, DstMPO));
+
+ MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg, DstMPO,
+ DstAlign);
+
+ // This value assign is needed here for the case, when the pointer is
+ // being put on the stack before a function call, since there is no
+ // other branch in the later coming code that would assign the pointer
+ // to the register passed to the callee.
+ //
+ // Let's suppose we are on a target, where there are only 32 bit
+ // physical registers. Like the riscv32 target and we want to pass a 128
+ // bit value. If we did not have this branch with the break we would end
+ // up with GMIR like this:
+ //
+ // %1:_(s128) = G_CONSTANT i128 1
+ // %4:_(p0) = G_FRAME_INDEX %stack.1
+ // G_STORE %2:_(s128), %4:_(p0) :: (store (s128), align 8)
+ // $x10 = COPY %1:_(s128)
+ //
+ // So the later code would try to copy the 128 bit value directly into
+ // the 32 bit register.
+ if (VA.isRegLoc()) {
+ Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
+ break;
+ }
+ IndirectParameterPassingHandled = true;
}
if (VA.isMemLoc() && !Flags.isByVal()) {
@@ -874,10 +868,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA);
- continue;
- }
-
- if (VA.isMemLoc() && Flags.isByVal()) {
+ } else if (VA.isMemLoc() && Flags.isByVal()) {
assert(Args[i].Regs.size() == 1 &&
"didn't expect split byval pointer");
@@ -916,25 +907,40 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
DstMPO, DstAlign, SrcMPO, SrcAlign,
MemSize, VA);
}
- continue;
- }
+ // QUESTION: How to keep this assert with the new if then else structured code?
+ //assert(!VA.needsCustom() && "custom loc should have been handled already");
- assert(!VA.needsCustom() && "custom loc should have been handled already");
-
- if (i == 0 && !ThisReturnRegs.empty() &&
+ } else if (i == 0 && !ThisReturnRegs.empty() &&
Handler.isIncomingArgumentHandler() &&
- isTypeIsValidForThisReturn(ValVT)) {
+ isTypeIsValidForThisReturn(ValVT) && VA.isRegLoc()) {
Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA);
- continue;
- }
-
- if (Handler.isIncomingArgumentHandler())
+ } else if (Handler.isIncomingArgumentHandler() && VA.isRegLoc()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
- else {
+ } else if (VA.isRegLoc()) {
DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
});
}
+
+ // Finish the handling of indirect parameter passing when receiving
+ // the value.
+ if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
+ Handler.isIncomingArgumentHandler()) {
+ Align Alignment = DL.getABITypeAlign(Args[i].Ty);
+ MachinePointerInfo DstMPO;
+
+ // Since we are doing indirect parameter passing, we know that the value
+ // in the temporary register is not the value passed to the function,
+ // but rather a pointer to that value. Let's load that value into the
+ // virtual register where the parameter should go.
+ MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], DstMPO,
+ Alignment);
+
+ IndirectParameterPassingHandled = true;
+ }
+
+ if (IndirectParameterPassingHandled)
+ break;
}
if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index c10921d1d8326..fd37cf8b9f346 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -19,7 +19,7 @@
define i64 @callee_128i_in_regs_stack_fst(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i128 %y ) {
; RV32I-LABEL: name: callee_128i_in_regs_stack_fst
; RV32I: bb.1 (%ir-block.0):
- ; RV32I-NEXT: liveins: $v3, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+ ; RV32I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
@@ -34,9 +34,9 @@ define i64 @callee_128i_in_regs_stack_fst(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i1
; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
; RV32I-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
- ; RV32I-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
- ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY8]](p0) :: (load (s128), align 8)
- ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s128)
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 16)
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[LOAD]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD1]](s128)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
@@ -55,12 +55,12 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -87,12 +87,12 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32F-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32F-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32F-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -119,12 +119,12 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32D-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32D-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32D-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
@@ -149,7 +149,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
define i64 @callee_128i_in_regs_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i64 %x8, i128 %y ) {
; RV32I-LABEL: name: callee_128i_in_regs_stack
; RV32I: bb.1 (%ir-block.0):
- ; RV32I-NEXT: liveins: $v23, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+ ; RV32I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
@@ -184,9 +184,9 @@ define i64 @callee_128i_in_regs_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x4, i64 %x
; RV32I-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (load (s32) from %fixed-stack.1)
; RV32I-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
; RV32I-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
- ; RV32I-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX8]](p0)
- ; RV32I-NEXT: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[COPY8]](p0) :: (load (s128), align 8)
- ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD8]](s128)
+ ; RV32I-NEXT: [[LOAD8:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX8]](p0) :: (load (s32) from %fixed-stack.0, align 16)
+ ; RV32I-NEXT: [[LOAD9:%[0-9]+]]:_(s128) = G_LOAD [[LOAD8]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD9]](s128)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
@@ -241,11 +241,11 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
; ILP32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
; ILP32-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
- ; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+ ; ILP32-NEXT: G_STORE [[C8]](s128), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -308,11 +308,11 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32F-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
; ILP32F-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
; ILP32F-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
- ; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+ ; ILP32F-NEXT: G_STORE [[C8]](s128), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -375,11 +375,11 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32D-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
; ILP32D-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
; ILP32D-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
- ; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+ ; ILP32D-NEXT: G_STORE [[C8]](s128), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
>From 4fb5892f36ab943ff06838882ca76a6ebd952ca0 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 14 Jun 2024 23:50:36 +0200
Subject: [PATCH 14/15] Format plus add some explanatory comments
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 36 ++++++++++++++------
1 file changed, 26 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 86f11a1aa2d59..a602ba89757df 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -772,7 +772,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
Args[i].Regs.resize(NumParts);
// When we have indirect parameter passing we are receiving a pointer,
- // that points to the actual value. In that case we need a pointer.
+ // that points to the actual value, so we need one "temporary" pointer.
if (VA.getLocInfo() == CCValAssign::Indirect &&
Args[i].Flags[0].isSplit()) {
if (Handler.isIncomingArgumentHandler())
@@ -796,6 +796,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
ValTy, extendOpFromFlags(Args[i].Flags[0]));
}
+
bool IndirectParameterPassingHandled = false;
bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL);
for (unsigned Part = 0; Part < NumParts; ++Part) {
@@ -805,9 +806,20 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
CCValAssign &VA = ArgLocs[j + Idx];
const ISD::ArgFlagsTy Flags = Args[i].Flags[Part];
- // We found an indirect parameter passing and we are at the first part of
- // the value being passed. In this case copy the incoming pointer into a
- // virtual register so later we can load it.
+ // We found an indirect parameter passing, and we have an
+ // OutgoingValueHandler as our handler (so we are at the call site or the
+ // return value). In this case, start the construction of the following
+ // GMIR, that is responsible for the preparation of indirect parameter
+ // passing:
+ //
+ // %1(indirectly passed type) = The value to pass
+ // %3(pointer) = G_FRAME_INDEX %stack.0
+ // G_STORE %1, %3 :: (store (s128), align 8)
+ //
+ // After this GMIR, the remaining part of the loop body will decide how
+ // to get the value to the caller and we break out of the loop.
+ // NOTE: In the case, when the the pointer pointing to the value is passed
+ // in a register there is an exception to this, that is detailed bellow.
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
!Handler.isIncomingArgumentHandler()) {
Align StackAlign = DL.getPrefTypeAlign(Args[i].Ty);
@@ -826,7 +838,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
} else {
FlagAlignment = Flags.getNonZeroOrigAlign();
}
- Align DstAlign = std::max(FlagAlignment, inferAlignFromPtrInfo(MF, DstMPO));
+ Align DstAlign =
+ std::max(FlagAlignment, inferAlignFromPtrInfo(MF, DstMPO));
MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg, DstMPO,
DstAlign);
@@ -907,12 +920,14 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
DstMPO, DstAlign, SrcMPO, SrcAlign,
MemSize, VA);
}
- // QUESTION: How to keep this assert with the new if then else structured code?
- //assert(!VA.needsCustom() && "custom loc should have been handled already");
+ // QUESTION: How to keep this assert with the new if then else
+ // structured code?
+ // assert(!VA.needsCustom() && "custom loc should have been handled
+ // already");
} else if (i == 0 && !ThisReturnRegs.empty() &&
- Handler.isIncomingArgumentHandler() &&
- isTypeIsValidForThisReturn(ValVT) && VA.isRegLoc()) {
+ Handler.isIncomingArgumentHandler() &&
+ isTypeIsValidForThisReturn(ValVT) && VA.isRegLoc()) {
Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA);
} else if (Handler.isIncomingArgumentHandler() && VA.isRegLoc()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
@@ -923,7 +938,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
// Finish the handling of indirect parameter passing when receiving
- // the value.
+ // the value (we are in the called function or the caller when receiving
+ // the return value).
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
Handler.isIncomingArgumentHandler()) {
Align Alignment = DL.getABITypeAlign(Args[i].Ty);
>From c7792b3b6d3efa064d34cd6e327c9d9b8f2ff158 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Sat, 15 Jun 2024 00:04:08 +0200
Subject: [PATCH 15/15] Format and modify a comment
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index a602ba89757df..18482ac7e9cec 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -815,7 +815,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// %1(indirectly passed type) = The value to pass
// %3(pointer) = G_FRAME_INDEX %stack.0
// G_STORE %1, %3 :: (store (s128), align 8)
- //
+ //
// After this GMIR, the remaining part of the loop body will decide how
// to get the value to the caller and we break out of the loop.
// NOTE: In the case, when the the pointer pointing to the value is passed
@@ -844,10 +844,10 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg, DstMPO,
DstAlign);
- // This value assign is needed here for the case, when the pointer is
- // being put on the stack before a function call, since there is no
- // other branch in the later coming code that would assign the pointer
- // to the register passed to the callee.
+ // This value assign is needed here for the case, when the pointer to
+ // stack is passed in a register since there is no other branch in the
+ // later coming code that would copy the pointer to stack to the
+ // register used in parameter passing.
//
// Let's suppose we are on a target, where there are only 32 bit
// physical registers. Like the riscv32 target and we want to pass a 128
More information about the llvm-commits
mailing list