[llvm] [GISel][RISCV]Implement indirect parameter passing (PR #95429)
Gábor Spaits via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 17 09:35:41 PDT 2024
https://github.com/spaits updated https://github.com/llvm/llvm-project/pull/95429
>From 3db3254691292a7793877d8bcc28351060598ca9 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 15:50:34 +0200
Subject: [PATCH 01/29] [GISel][RISCV]Implement indirect parameter passing for
large scalars
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 61 +++++--
.../Target/RISCV/GISel/RISCVCallLowering.cpp | 15 +-
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 167 ++++++++++++++++++
.../calling-conv-lp64-lp64f-lp64d-common.ll | 79 +++++++++
4 files changed, 302 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 412cd0a21ad41..daa5e0c07e9b7 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -751,6 +751,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy;
const EVT OrigVT = EVT::getEVT(Args[i].Ty);
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
+ const LLT PointerTy = LLT::pointer(0, DL.getPointerSizeInBits(0));
// Expected to be multiple regs for a single incoming arg.
// There should be Regs.size() ArgLocs per argument.
@@ -764,19 +765,28 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// If we can't directly assign the register, we need one or more
// intermediate values.
Args[i].Regs.resize(NumParts);
-
- // For each split register, create and assign a vreg that will store
- // the incoming component of the larger value. These will later be
- // merged to form the final vreg.
- for (unsigned Part = 0; Part < NumParts; ++Part)
- Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
+
+ // When we have indirect parameter passing we are receiving a pointer,
+ // that points to the actual value. In that case we need a pointer.
+ if (VA.getLocInfo() == CCValAssign::Indirect &&
+ Args[i].Flags[0].isSplit()) {
+ if (Handler.isIncomingArgumentHandler())
+ Args[i].Regs[0] = MRI.createGenericVirtualRegister(PointerTy);
+ } else {
+ // For each split register, create and assign a vreg that will store
+ // the incoming component of the larger value. These will later be
+ // merged to form the final vreg.
+ for (unsigned Part = 0; Part < NumParts; ++Part)
+ Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
+ }
}
assert((j + (NumParts - 1)) < ArgLocs.size() &&
"Too many regs for number of args");
// Coerce into outgoing value types before register assignment.
- if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy) {
+ if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy &&
+ VA.getLocInfo() != CCValAssign::Indirect) {
assert(Args[i].OrigRegs.size() == 1);
buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
ValTy, extendOpFromFlags(Args[i].Flags[0]));
@@ -790,6 +800,28 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
CCValAssign &VA = ArgLocs[j + Idx];
const ISD::ArgFlagsTy Flags = Args[i].Flags[Part];
+ // We found an indirect parameter passing and we are at the first part of
+ // the value being passed. In this case copy the incoming pointer into a
+ // virtual register so later we can load it.
+ if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
+ if (Handler.isIncomingArgumentHandler())
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ else {
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
+ Align(8), false);
+
+ auto PointerToStackReg =
+ MIRBuilder.buildFrameIndex(PointerTy, FrameIdx)
+ ->getOperand(0)
+ .getReg();
+ Handler.assignValueToAddress(Args[i].OrigRegs[Part], PointerToStackReg,
+ OrigTy, MachinePointerInfo{}, VA);
+ Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
+ }
+ break;
+ }
+
if (VA.isMemLoc() && !Flags.isByVal()) {
// Individual pieces may have been spilled to the stack and others
// passed in registers.
@@ -866,11 +898,16 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
}
- // Now that all pieces have been assigned, re-pack the register typed values
- // into the original value typed registers.
- if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
- // Merge the split registers into the expected larger result vregs of
- // the original call.
+ // In case of indirect parameter passing load the value referred to by
+ // the argument.
+ if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
+ VA.getLocInfo() == CCValAssign::Indirect) {
+ Handler.assignValueToAddress(Args[i].OrigRegs[0], Args[i].Regs[0], OrigTy,
+ MachinePointerInfo{}, VA);
+
+ } else if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
+ // Now that all pieces have been assigned, re-pack the register typed values
+ // into the original value typed registers.
buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
LocTy, Args[i].Flags[0]);
}
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 2bfee45852b20..b1f381f4b30ad 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -89,12 +89,13 @@ struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
const MachinePointerInfo &MPO,
const CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
- uint64_t LocMemOffset = VA.getLocMemOffset();
-
+ uint64_t Offset = 0;
+ if (VA.isMemLoc())
+ Offset = VA.getLocMemOffset();
+
// TODO: Move StackAlignment to subtarget and share with FrameLowering.
- auto MMO =
- MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
- commonAlignment(Align(16), LocMemOffset));
+ auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
+ commonAlignment(Align(16), Offset));
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
@@ -341,10 +342,8 @@ static bool isLegalElementTypeForRVV(Type *EltTy,
// TODO: Remove IsLowerArgs argument by adding support for vectors in lowerCall.
static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget,
bool IsLowerArgs = false) {
- // TODO: Integers larger than 2*XLen are passed indirectly which is not
- // supported yet.
if (T->isIntegerTy())
- return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
+ return true;
if (T->isHalfTy() || T->isFloatTy() || T->isDoubleTy())
return true;
if (T->isPointerTy())
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 1a3489521af19..92f5f6220f096 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -16,6 +16,173 @@
; Check that on RV32, i64 is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.
+define i64 @callee_128i_in_regs(i128 %x, i128 %y ) {
+ ; RV32I-LABEL: name: callee_128i_in_regs
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $x10, $x11
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128), align 1)
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128), align 1)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s128)
+ ; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+ ; RV32I-NEXT: $x10 = COPY [[UV]](s32)
+ ; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
+ %2 = trunc i128 %x to i64
+ ret i64 %2
+}
+
+define i32 @caller_128i_in_regs( ) {
+ ; ILP32-LABEL: name: caller_128i_in_regs
+ ; ILP32: bb.1 (%ir-block.0):
+ ; ILP32-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1
+ ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32F-LABEL: name: caller_128i_in_regs
+ ; ILP32F: bb.1 (%ir-block.0):
+ ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1
+ ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32F-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32F-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32F-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32F-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32D-LABEL: name: caller_128i_in_regs
+ ; ILP32D: bb.1 (%ir-block.0):
+ ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1
+ ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32D-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32D-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32D-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32D-NEXT: PseudoRET implicit $x10
+ %1 = call i64 @callee_128i_in_regs(i128 1, i128 2)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
+define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
+
+ ; RV32I-LABEL: name: callee_256i_in_regs
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $x10, $x11
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 1)
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 1)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
+ ; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+ ; RV32I-NEXT: $x10 = COPY [[UV]](s32)
+ ; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
+ %2 = trunc i256 %x to i64
+ ret i64 %2
+}
+
+define i32 @caller_256i_in_regs( ) {
+ ; ILP32-LABEL: name: caller_256i_in_regs
+ ; ILP32: bb.1 (%ir-block.0):
+ ; ILP32-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32F-LABEL: name: caller_256i_in_regs
+ ; ILP32F: bb.1 (%ir-block.0):
+ ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32F-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32F-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32F-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32F-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32D-LABEL: name: caller_256i_in_regs
+ ; ILP32D: bb.1 (%ir-block.0):
+ ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; ILP32D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+ ; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32D-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32D-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; ILP32D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32D-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32D-NEXT: PseudoRET implicit $x10
+ %1 = call i64 @callee_256i_in_regs(i256 1, i256 2)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
define i32 @callee_i64_in_regs(i32 %a, i64 %b) nounwind {
; RV32I-LABEL: name: callee_i64_in_regs
; RV32I: bb.1 (%ir-block.0):
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index b175b8d92e6c9..0467bbe3d41bb 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -16,6 +16,85 @@
; Check that on RV64, i128 is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.
+define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
+
+ ; RV64I-LABEL: name: callee_256i_in_regs
+ ; RV64I: bb.1 (%ir-block.0):
+ ; RV64I-NEXT: liveins: $x10, $x11
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+ ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 1)
+ ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+ ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 1)
+ ; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
+ ; RV64I-NEXT: $x10 = COPY [[TRUNC]](s64)
+ ; RV64I-NEXT: PseudoRET implicit $x10
+ %2 = trunc i256 %x to i64
+ ret i64 %2
+}
+
+define i32 @caller_256i_in_regs( ) {
+ ; LP64-LABEL: name: caller_256i_in_regs
+ ; LP64: bb.1 (%ir-block.0):
+ ; LP64-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; LP64-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; LP64-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; LP64-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; LP64-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; LP64-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; LP64-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; LP64-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; LP64-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; LP64-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; LP64-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; LP64-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; LP64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+ ; LP64-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; LP64-NEXT: PseudoRET implicit $x10
+ ;
+ ; LP64F-LABEL: name: caller_256i_in_regs
+ ; LP64F: bb.1 (%ir-block.0):
+ ; LP64F-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; LP64F-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; LP64F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; LP64F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; LP64F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; LP64F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; LP64F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; LP64F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; LP64F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; LP64F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; LP64F-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; LP64F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; LP64F-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+ ; LP64F-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; LP64F-NEXT: PseudoRET implicit $x10
+ ;
+ ; LP64D-LABEL: name: caller_256i_in_regs
+ ; LP64D: bb.1 (%ir-block.0):
+ ; LP64D-NEXT: [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+ ; LP64D-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+ ; LP64D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+ ; LP64D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; LP64D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; LP64D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
+ ; LP64D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+ ; LP64D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
+ ; LP64D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+ ; LP64D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+ ; LP64D-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; LP64D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+ ; LP64D-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+ ; LP64D-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; LP64D-NEXT: PseudoRET implicit $x10
+ %1 = call i64 @callee_256i_in_regs(i256 1, i256 2)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
define i64 @callee_i128_in_regs(i64 %a, i128 %b) nounwind {
; RV64I-LABEL: name: callee_i128_in_regs
; RV64I: bb.1 (%ir-block.0):
>From 7f0647c34bc342ff3fbf555fa658c973b6c7ffeb Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 19:58:49 +0200
Subject: [PATCH 02/29] Move code responsible for loading one level deeper
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 23 +++++++++-----------
1 file changed, 10 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index daa5e0c07e9b7..dd3f6d29524c9 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -765,7 +765,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// If we can't directly assign the register, we need one or more
// intermediate values.
Args[i].Regs.resize(NumParts);
-
+
// When we have indirect parameter passing we are receiving a pointer,
// that points to the actual value. In that case we need a pointer.
if (VA.getLocInfo() == CCValAssign::Indirect &&
@@ -804,9 +804,12 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// the value being passed. In this case copy the incoming pointer into a
// virtual register so later we can load it.
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
- if (Handler.isIncomingArgumentHandler())
+ if (Handler.isIncomingArgumentHandler()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
- else {
+ Handler.assignValueToAddress(Args[i].OrigRegs[Part],
+ Args[i].Regs[Part], OrigTy,
+ MachinePointerInfo{}, VA);
+ } else {
MachineFrameInfo &MFI = MF.getFrameInfo();
int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
Align(8), false);
@@ -815,8 +818,9 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MIRBuilder.buildFrameIndex(PointerTy, FrameIdx)
->getOperand(0)
.getReg();
- Handler.assignValueToAddress(Args[i].OrigRegs[Part], PointerToStackReg,
- OrigTy, MachinePointerInfo{}, VA);
+ Handler.assignValueToAddress(Args[i].OrigRegs[Part],
+ PointerToStackReg, OrigTy,
+ MachinePointerInfo{}, VA);
Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
}
break;
@@ -898,14 +902,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
}
- // In case of indirect parameter passing load the value referred to by
- // the argument.
- if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
- VA.getLocInfo() == CCValAssign::Indirect) {
- Handler.assignValueToAddress(Args[i].OrigRegs[0], Args[i].Regs[0], OrigTy,
- MachinePointerInfo{}, VA);
-
- } else if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
+ if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
// Now that all pieces have been assigned, re-pack the register typed values
// into the original value typed registers.
buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
>From 0def404cba2915c05edb6d78c01d3e7838daec5f Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 20:49:08 +0200
Subject: [PATCH 03/29] Don't try to merge values when indirect parameter
passing has happened
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index dd3f6d29524c9..b4848383548d6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -791,7 +791,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
ValTy, extendOpFromFlags(Args[i].Flags[0]));
}
-
+ bool IndirectParameterPassingHandled = false;
bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL);
for (unsigned Part = 0; Part < NumParts; ++Part) {
Register ArgReg = Args[i].Regs[Part];
@@ -804,6 +804,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// the value being passed. In this case copy the incoming pointer into a
// virtual register so later we can load it.
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
+ IndirectParameterPassingHandled = true;
if (Handler.isIncomingArgumentHandler()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
Handler.assignValueToAddress(Args[i].OrigRegs[Part],
@@ -902,9 +903,10 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
}
- if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
- // Now that all pieces have been assigned, re-pack the register typed values
- // into the original value typed registers.
+ if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
+ !IndirectParameterPassingHandled) {
+ // Now that all pieces have been assigned, re-pack the register typed
+ // values into the original value typed registers.
buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
LocTy, Args[i].Flags[0]);
}
>From 28f66143c406956894b8868cef8eb6f47c965aa8 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 21:02:22 +0200
Subject: [PATCH 04/29] Use proper stack alignment
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index b4848383548d6..c1ad0f6f654e8 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -811,9 +811,11 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
Args[i].Regs[Part], OrigTy,
MachinePointerInfo{}, VA);
} else {
+ Align StackAlign =
+ MF.getSubtarget().getFrameLowering()->getStackAlign();
MachineFrameInfo &MFI = MF.getFrameInfo();
int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
- Align(8), false);
+ StackAlign, false);
auto PointerToStackReg =
MIRBuilder.buildFrameIndex(PointerTy, FrameIdx)
>From 6d22315eb70283b01331d53f63da2e3f72a983cf Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 21:07:03 +0200
Subject: [PATCH 05/29] Format
---
llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index b1f381f4b30ad..180e238a0a978 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -92,10 +92,10 @@ struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
uint64_t Offset = 0;
if (VA.isMemLoc())
Offset = VA.getLocMemOffset();
-
+
// TODO: Move StackAlignment to subtarget and share with FrameLowering.
auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
- commonAlignment(Align(16), Offset));
+ commonAlignment(Align(16), Offset));
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
>From fc4c6e63d1e6b2d6c8222821574a6a73db7c2c2e Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 22:30:03 +0200
Subject: [PATCH 06/29] Use alloca address space
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index c1ad0f6f654e8..53da5c68ca32c 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -751,7 +751,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy;
const EVT OrigVT = EVT::getEVT(Args[i].Ty);
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
- const LLT PointerTy = LLT::pointer(0, DL.getPointerSizeInBits(0));
+ const LLT PointerTy = LLT::pointer(MF.getDataLayout().getAllocaAddrSpace(),
+ DL.getPointerSizeInBits(0));
// Expected to be multiple regs for a single incoming arg.
// There should be Regs.size() ArgLocs per argument.
>From da2e5aed1abd255f75e093d22d5c2f2fb5c88973 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 22:35:45 +0200
Subject: [PATCH 07/29] Use the preferred OrigTy alignment
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 53da5c68ca32c..20952c4a4bef6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -812,8 +812,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
Args[i].Regs[Part], OrigTy,
MachinePointerInfo{}, VA);
} else {
- Align StackAlign =
- MF.getSubtarget().getFrameLowering()->getStackAlign();
+ Align StackAlign = DL.getPrefTypeAlign(Args[i].Ty);
MachineFrameInfo &MFI = MF.getFrameInfo();
int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
StackAlign, false);
>From 9d9d01dd8aaae6f223b57135ae9e0d384e2e0d3e Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Thu, 13 Jun 2024 22:39:26 +0200
Subject: [PATCH 08/29] Use alloca address space part2
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 20952c4a4bef6..5c59804775fae 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -743,6 +743,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
continue;
}
+ auto AllocaAddressSpace = MF.getDataLayout().getAllocaAddrSpace();
+
const MVT ValVT = VA.getValVT();
const MVT LocVT = VA.getLocVT();
@@ -751,8 +753,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy;
const EVT OrigVT = EVT::getEVT(Args[i].Ty);
const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
- const LLT PointerTy = LLT::pointer(MF.getDataLayout().getAllocaAddrSpace(),
- DL.getPointerSizeInBits(0));
+ const LLT PointerTy = LLT::pointer(
+ AllocaAddressSpace, DL.getPointerSizeInBits(AllocaAddressSpace));
// Expected to be multiple regs for a single incoming arg.
// There should be Regs.size() ArgLocs per argument.
>From 4515c81681669c27761fd36005aa6014fb66975c Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 14 Jun 2024 09:48:00 +0200
Subject: [PATCH 09/29] Simplify G_FRAME_INDEX creation
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 7 +++----
1 file changed, 3 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 5c59804775fae..f9564b33672ce 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -19,6 +19,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/IntrinsicInst.h"
@@ -819,10 +820,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
StackAlign, false);
- auto PointerToStackReg =
- MIRBuilder.buildFrameIndex(PointerTy, FrameIdx)
- ->getOperand(0)
- .getReg();
+ Register PointerToStackReg =
+ MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
Handler.assignValueToAddress(Args[i].OrigRegs[Part],
PointerToStackReg, OrigTy,
MachinePointerInfo{}, VA);
>From 9ae6859eff375ea0c0994a0e4cde49e4eb42640f Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 14 Jun 2024 10:44:57 +0200
Subject: [PATCH 10/29] Do not dispatch loads and stores to Handler
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 16 ++++++----
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 32 +++++++++----------
.../calling-conv-lp64-lp64f-lp64d-common.ll | 4 +--
3 files changed, 28 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index f9564b33672ce..243ad1e083780 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -811,9 +811,9 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
IndirectParameterPassingHandled = true;
if (Handler.isIncomingArgumentHandler()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
- Handler.assignValueToAddress(Args[i].OrigRegs[Part],
- Args[i].Regs[Part], OrigTy,
- MachinePointerInfo{}, VA);
+ Align Alignment = DL.getABITypeAlign(Args[i].Ty);
+ MachinePointerInfo DstMPO;
+ MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], DstMPO, Alignment);
} else {
Align StackAlign = DL.getPrefTypeAlign(Args[i].Ty);
MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -822,9 +822,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
Register PointerToStackReg =
MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
- Handler.assignValueToAddress(Args[i].OrigRegs[Part],
- PointerToStackReg, OrigTy,
- MachinePointerInfo{}, VA);
+
+ MachinePointerInfo DstMPO;
+ Align DstAlign = std::max(Flags.getNonZeroOrigAlign(),
+ inferAlignFromPtrInfo(MF, DstMPO));
+ MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg,
+ DstMPO, DstAlign);
+
Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
}
break;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 92f5f6220f096..3803c044372fc 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -22,9 +22,9 @@ define i64 @callee_128i_in_regs(i128 %x, i128 %y ) {
; RV32I-NEXT: liveins: $x10, $x11
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
- ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128), align 1)
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128), align 8)
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
- ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128), align 1)
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128), align 8)
; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s128)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
@@ -41,10 +41,10 @@ define i32 @caller_128i_in_regs( ) {
; ILP32-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128), align 8)
; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -61,10 +61,10 @@ define i32 @caller_128i_in_regs( ) {
; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128), align 8)
; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -81,10 +81,10 @@ define i32 @caller_128i_in_regs( ) {
; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128))
+ ; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128))
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128), align 8)
; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -106,9 +106,9 @@ define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
; RV32I-NEXT: liveins: $x10, $x11
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
- ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 1)
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 8)
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
- ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 1)
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 8)
; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
@@ -125,10 +125,10 @@ define i32 @caller_256i_in_regs( ) {
; ILP32-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 8)
; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 8)
; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -145,10 +145,10 @@ define i32 @caller_256i_in_regs( ) {
; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 8)
; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 8)
; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -165,10 +165,10 @@ define i32 @caller_256i_in_regs( ) {
; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; ILP32D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 8)
; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; ILP32D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 8)
; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index 0467bbe3d41bb..caeb705039abf 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -23,9 +23,9 @@ define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
; RV64I-NEXT: liveins: $x10, $x11
; RV64I-NEXT: {{ $}}
; RV64I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
- ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 1)
+ ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 16)
; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
- ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 1)
+ ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 16)
; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
; RV64I-NEXT: $x10 = COPY [[TRUNC]](s64)
; RV64I-NEXT: PseudoRET implicit $x10
>From 32e991cc33166dbe909eabc2899321a53a76de00 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 14 Jun 2024 12:55:33 +0200
Subject: [PATCH 11/29] Receive indirect args from the stack
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 24 +++++++-
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 56 +++++++++++++++++++
2 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 243ad1e083780..4eb4a2230aac3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -25,6 +25,7 @@
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/Support/Alignment.h"
#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "call-lowering"
@@ -810,7 +811,17 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
IndirectParameterPassingHandled = true;
if (Handler.isIncomingArgumentHandler()) {
- Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ Register PhysReg;
+ if (VA.isRegLoc()) {
+ PhysReg = VA.getLocReg();
+ } else if (VA.isMemLoc()) {
+ LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
+
+ MachinePointerInfo MPO;
+ PhysReg = Handler.getStackAddress(
+ MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
+ }
+ Handler.assignValueToReg(ArgReg, PhysReg, VA);
Align Alignment = DL.getABITypeAlign(Args[i].Ty);
MachinePointerInfo DstMPO;
MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], DstMPO, Alignment);
@@ -824,8 +835,15 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
MachinePointerInfo DstMPO;
- Align DstAlign = std::max(Flags.getNonZeroOrigAlign(),
- inferAlignFromPtrInfo(MF, DstMPO));
+ Align DstAlign{};
+ Align FlagAlignment{};
+ if (Flags.isByVal()) {
+ FlagAlignment = Flags.getNonZeroByValAlign();
+ } else {
+ FlagAlignment = Flags.getNonZeroOrigAlign();
+ }
+ DstAlign = std::max(FlagAlignment,
+ inferAlignFromPtrInfo(MF, DstMPO));
MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg,
DstMPO, DstAlign);
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 3803c044372fc..8f2ec31ce4bc6 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -16,6 +16,62 @@
; Check that on RV32, i64 is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.
+define i64 @callee_128i_in_regs_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i64 %x8, i128 %y ) {
+ ; RV32I-LABEL: name: callee_128i_in_regs_stack
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $v23, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; RV32I-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
+ ; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13
+ ; RV32I-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; RV32I-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14
+ ; RV32I-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15
+ ; RV32I-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; RV32I-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16
+ ; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
+ ; RV32I-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.8
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.8, align 16)
+ ; RV32I-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.7)
+ ; RV32I-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6
+ ; RV32I-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.6, align 8)
+ ; RV32I-NEXT: [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5
+ ; RV32I-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.5)
+ ; RV32I-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4
+ ; RV32I-NEXT: [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (load (s32) from %fixed-stack.4, align 16)
+ ; RV32I-NEXT: [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
+ ; RV32I-NEXT: [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (load (s32) from %fixed-stack.3)
+ ; RV32I-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
+ ; RV32I-NEXT: [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (load (s32) from %fixed-stack.2, align 8)
+ ; RV32I-NEXT: [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+ ; RV32I-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (load (s32) from %fixed-stack.1)
+ ; RV32I-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; RV32I-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX8]](p0)
+ ; RV32I-NEXT: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[COPY8]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD8]](s128)
+ ; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+ ; RV32I-NEXT: $x10 = COPY [[UV]](s32)
+ ; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
+ %2 = trunc i128 %y to i64
+ ret i64 %2
+}
+
+define i32 @caller_128i_in_regs_stack() {
+ %1 = call i64 @callee_128i_in_regs_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i128 42)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
+
define i64 @callee_128i_in_regs(i128 %x, i128 %y ) {
; RV32I-LABEL: name: callee_128i_in_regs
; RV32I: bb.1 (%ir-block.0):
>From c730b44e49e8c0e9366ccadfd30b7f4986cc7e22 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 14 Jun 2024 14:02:50 +0200
Subject: [PATCH 12/29] Pass indirect args on the stack
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 31 +-
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 330 ++++++++++++++++++
2 files changed, 350 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 4eb4a2230aac3..266441fe2b0e0 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -810,17 +810,20 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// virtual register so later we can load it.
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
IndirectParameterPassingHandled = true;
+ bool IsInStack = false;
+ Register PhysReg;
+ if (VA.isRegLoc()) {
+ PhysReg = VA.getLocReg();
+ } else if (VA.isMemLoc()) {
+ IsInStack = true;
+ LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
+ MachinePointerInfo MPO;
+ PhysReg = Handler.getStackAddress(
+ MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
+ }
+
if (Handler.isIncomingArgumentHandler()) {
- Register PhysReg;
- if (VA.isRegLoc()) {
- PhysReg = VA.getLocReg();
- } else if (VA.isMemLoc()) {
- LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
-
- MachinePointerInfo MPO;
- PhysReg = Handler.getStackAddress(
- MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
- }
+
Handler.assignValueToReg(ArgReg, PhysReg, VA);
Align Alignment = DL.getABITypeAlign(Args[i].Ty);
MachinePointerInfo DstMPO;
@@ -844,10 +847,16 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
DstAlign = std::max(FlagAlignment,
inferAlignFromPtrInfo(MF, DstMPO));
+
MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg,
DstMPO, DstAlign);
- Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
+ if (!IsInStack) {
+ Handler.assignValueToReg(PointerToStackReg, PhysReg, VA);
+ } else {
+ MIRBuilder.buildStore(PointerToStackReg, PhysReg,
+ DstMPO, DstAlign);
+ }
}
break;
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 8f2ec31ce4bc6..c10921d1d8326 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -16,6 +16,136 @@
; Check that on RV32, i64 is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.
+define i64 @callee_128i_in_regs_stack_fst(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i128 %y ) {
+ ; RV32I-LABEL: name: callee_128i_in_regs_stack_fst
+ ; RV32I: bb.1 (%ir-block.0):
+ ; RV32I-NEXT: liveins: $v3, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+ ; RV32I-NEXT: {{ $}}
+ ; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+ ; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+ ; RV32I-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
+ ; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13
+ ; RV32I-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; RV32I-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14
+ ; RV32I-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15
+ ; RV32I-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; RV32I-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16
+ ; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
+ ; RV32I-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; RV32I-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY8]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s128)
+ ; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+ ; RV32I-NEXT: $x10 = COPY [[UV]](s32)
+ ; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
+ ; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
+ %2 = trunc i128 %y to i64
+ ret i64 %2
+}
+
+define i32 @caller_128i_in_regs_stack_fst( ) {
+ ; ILP32-LABEL: name: caller_128i_in_regs_stack_fst
+ ; ILP32: bb.1 (%ir-block.0):
+ ; ILP32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32-NEXT: $x10 = COPY [[UV]](s32)
+ ; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
+ ; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
+ ; ILP32-NEXT: $x13 = COPY [[UV3]](s32)
+ ; ILP32-NEXT: $x14 = COPY [[UV4]](s32)
+ ; ILP32-NEXT: $x15 = COPY [[UV5]](s32)
+ ; ILP32-NEXT: $x16 = COPY [[UV6]](s32)
+ ; ILP32-NEXT: $x17 = COPY [[UV7]](s32)
+ ; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack_fst, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+ ; ILP32-NEXT: ADJCALLSTACKUP 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; ILP32-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32F-LABEL: name: caller_128i_in_regs_stack_fst
+ ; ILP32F: bb.1 (%ir-block.0):
+ ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32F-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32F-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32F-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32F-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
+ ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
+ ; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
+ ; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
+ ; ILP32F-NEXT: $x13 = COPY [[UV3]](s32)
+ ; ILP32F-NEXT: $x14 = COPY [[UV4]](s32)
+ ; ILP32F-NEXT: $x15 = COPY [[UV5]](s32)
+ ; ILP32F-NEXT: $x16 = COPY [[UV6]](s32)
+ ; ILP32F-NEXT: $x17 = COPY [[UV7]](s32)
+ ; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack_fst, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+ ; ILP32F-NEXT: ADJCALLSTACKUP 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32F-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32F-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; ILP32F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32F-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32F-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32D-LABEL: name: caller_128i_in_regs_stack_fst
+ ; ILP32D: bb.1 (%ir-block.0):
+ ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+ ; ILP32D-NEXT: ADJCALLSTACKDOWN 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32D-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32D-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32D-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
+ ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
+ ; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
+ ; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
+ ; ILP32D-NEXT: $x13 = COPY [[UV3]](s32)
+ ; ILP32D-NEXT: $x14 = COPY [[UV4]](s32)
+ ; ILP32D-NEXT: $x15 = COPY [[UV5]](s32)
+ ; ILP32D-NEXT: $x16 = COPY [[UV6]](s32)
+ ; ILP32D-NEXT: $x17 = COPY [[UV7]](s32)
+ ; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack_fst, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+ ; ILP32D-NEXT: ADJCALLSTACKUP 4, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32D-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32D-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; ILP32D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32D-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32D-NEXT: PseudoRET implicit $x10
+ %1 = call i64 @callee_128i_in_regs_stack_fst(i64 1,i64 1, i64 1, i64 1, i128 2)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
define i64 @callee_128i_in_regs_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i64 %x8, i128 %y ) {
; RV32I-LABEL: name: callee_128i_in_regs_stack
; RV32I: bb.1 (%ir-block.0):
@@ -66,6 +196,206 @@ define i64 @callee_128i_in_regs_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x4, i64 %x
}
define i32 @caller_128i_in_regs_stack() {
+ ; ILP32-LABEL: name: caller_128i_in_regs_stack
+ ; ILP32: bb.1 (%ir-block.0):
+ ; ILP32-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; ILP32-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; ILP32-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; ILP32-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; ILP32-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; ILP32-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; ILP32-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; ILP32-NEXT: [[C8:%[0-9]+]]:_(s128) = G_CONSTANT i128 42
+ ; ILP32-NEXT: ADJCALLSTACKDOWN 36, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+ ; ILP32-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
+ ; ILP32-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64)
+ ; ILP32-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+ ; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s32)
+ ; ILP32-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+ ; ILP32-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; ILP32-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s32)
+ ; ILP32-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4)
+ ; ILP32-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+ ; ILP32-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; ILP32-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s32)
+ ; ILP32-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 8)
+ ; ILP32-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; ILP32-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s32)
+ ; ILP32-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12)
+ ; ILP32-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+ ; ILP32-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; ILP32-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s32)
+ ; ILP32-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 16)
+ ; ILP32-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; ILP32-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s32)
+ ; ILP32-NEXT: G_STORE [[UV13]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20)
+ ; ILP32-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+ ; ILP32-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; ILP32-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s32)
+ ; ILP32-NEXT: G_STORE [[UV14]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 8)
+ ; ILP32-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+ ; ILP32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
+ ; ILP32-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
+ ; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32-NEXT: $x10 = COPY [[UV]](s32)
+ ; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
+ ; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
+ ; ILP32-NEXT: $x13 = COPY [[UV3]](s32)
+ ; ILP32-NEXT: $x14 = COPY [[UV4]](s32)
+ ; ILP32-NEXT: $x15 = COPY [[UV5]](s32)
+ ; ILP32-NEXT: $x16 = COPY [[UV6]](s32)
+ ; ILP32-NEXT: $x17 = COPY [[UV7]](s32)
+ ; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+ ; ILP32-NEXT: ADJCALLSTACKUP 36, 0, implicit-def $x2, implicit $x2
+ ; ILP32-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; ILP32-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32F-LABEL: name: caller_128i_in_regs_stack
+ ; ILP32F: bb.1 (%ir-block.0):
+ ; ILP32F-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; ILP32F-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; ILP32F-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; ILP32F-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; ILP32F-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; ILP32F-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; ILP32F-NEXT: [[C8:%[0-9]+]]:_(s128) = G_CONSTANT i128 42
+ ; ILP32F-NEXT: ADJCALLSTACKDOWN 36, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32F-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+ ; ILP32F-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
+ ; ILP32F-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64)
+ ; ILP32F-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+ ; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32F-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+ ; ILP32F-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; ILP32F-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4)
+ ; ILP32F-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+ ; ILP32F-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; ILP32F-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 8)
+ ; ILP32F-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; ILP32F-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12)
+ ; ILP32F-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+ ; ILP32F-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; ILP32F-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 16)
+ ; ILP32F-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; ILP32F-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV13]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20)
+ ; ILP32F-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+ ; ILP32F-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; ILP32F-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV14]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 8)
+ ; ILP32F-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+ ; ILP32F-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
+ ; ILP32F-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
+ ; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+ ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
+ ; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
+ ; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
+ ; ILP32F-NEXT: $x13 = COPY [[UV3]](s32)
+ ; ILP32F-NEXT: $x14 = COPY [[UV4]](s32)
+ ; ILP32F-NEXT: $x15 = COPY [[UV5]](s32)
+ ; ILP32F-NEXT: $x16 = COPY [[UV6]](s32)
+ ; ILP32F-NEXT: $x17 = COPY [[UV7]](s32)
+ ; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+ ; ILP32F-NEXT: ADJCALLSTACKUP 36, 0, implicit-def $x2, implicit $x2
+ ; ILP32F-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32F-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32F-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; ILP32F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32F-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32F-NEXT: PseudoRET implicit $x10
+ ;
+ ; ILP32D-LABEL: name: caller_128i_in_regs_stack
+ ; ILP32D: bb.1 (%ir-block.0):
+ ; ILP32D-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; ILP32D-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; ILP32D-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; ILP32D-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; ILP32D-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; ILP32D-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; ILP32D-NEXT: [[C8:%[0-9]+]]:_(s128) = G_CONSTANT i128 42
+ ; ILP32D-NEXT: ADJCALLSTACKDOWN 36, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32D-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+ ; ILP32D-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
+ ; ILP32D-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64)
+ ; ILP32D-NEXT: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+ ; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; ILP32D-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+ ; ILP32D-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+ ; ILP32D-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV9]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4)
+ ; ILP32D-NEXT: [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+ ; ILP32D-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+ ; ILP32D-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV10]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 8)
+ ; ILP32D-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+ ; ILP32D-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV11]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12)
+ ; ILP32D-NEXT: [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+ ; ILP32D-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; ILP32D-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV12]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 16)
+ ; ILP32D-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+ ; ILP32D-NEXT: [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV13]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20)
+ ; ILP32D-NEXT: [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+ ; ILP32D-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+ ; ILP32D-NEXT: [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV14]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 8)
+ ; ILP32D-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+ ; ILP32D-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
+ ; ILP32D-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
+ ; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+ ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
+ ; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
+ ; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
+ ; ILP32D-NEXT: $x13 = COPY [[UV3]](s32)
+ ; ILP32D-NEXT: $x14 = COPY [[UV4]](s32)
+ ; ILP32D-NEXT: $x15 = COPY [[UV5]](s32)
+ ; ILP32D-NEXT: $x16 = COPY [[UV6]](s32)
+ ; ILP32D-NEXT: $x17 = COPY [[UV7]](s32)
+ ; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+ ; ILP32D-NEXT: ADJCALLSTACKUP 36, 0, implicit-def $x2, implicit $x2
+ ; ILP32D-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+ ; ILP32D-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+ ; ILP32D-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+ ; ILP32D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+ ; ILP32D-NEXT: $x10 = COPY [[TRUNC]](s32)
+ ; ILP32D-NEXT: PseudoRET implicit $x10
%1 = call i64 @callee_128i_in_regs_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i128 42)
%2 = trunc i64 %1 to i32
ret i32 %2
>From fe772d8c4f948da04e90ac74020829760873bcf8 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 14 Jun 2024 23:32:49 +0200
Subject: [PATCH 13/29] Fix live-in virtual registers and restructure code
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 132 +++++++++---------
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 52 +++----
2 files changed, 95 insertions(+), 89 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 266441fe2b0e0..86f11a1aa2d59 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -808,57 +808,51 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// We found an indirect parameter passing and we are at the first part of
// the value being passed. In this case copy the incoming pointer into a
// virtual register so later we can load it.
- if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit()) {
- IndirectParameterPassingHandled = true;
- bool IsInStack = false;
- Register PhysReg;
- if (VA.isRegLoc()) {
- PhysReg = VA.getLocReg();
- } else if (VA.isMemLoc()) {
- IsInStack = true;
- LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
- MachinePointerInfo MPO;
- PhysReg = Handler.getStackAddress(
- MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
- }
+ if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
+ !Handler.isIncomingArgumentHandler()) {
+ Align StackAlign = DL.getPrefTypeAlign(Args[i].Ty);
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
+ StackAlign, false);
- if (Handler.isIncomingArgumentHandler()) {
-
- Handler.assignValueToReg(ArgReg, PhysReg, VA);
- Align Alignment = DL.getABITypeAlign(Args[i].Ty);
- MachinePointerInfo DstMPO;
- MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], DstMPO, Alignment);
+ Register PointerToStackReg =
+ MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
+
+ MachinePointerInfo DstMPO;
+
+ Align FlagAlignment{};
+ if (Flags.isByVal()) {
+ FlagAlignment = Flags.getNonZeroByValAlign();
} else {
- Align StackAlign = DL.getPrefTypeAlign(Args[i].Ty);
- MachineFrameInfo &MFI = MF.getFrameInfo();
- int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
- StackAlign, false);
-
- Register PointerToStackReg =
- MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
-
- MachinePointerInfo DstMPO;
- Align DstAlign{};
- Align FlagAlignment{};
- if (Flags.isByVal()) {
- FlagAlignment = Flags.getNonZeroByValAlign();
- } else {
- FlagAlignment = Flags.getNonZeroOrigAlign();
- }
- DstAlign = std::max(FlagAlignment,
- inferAlignFromPtrInfo(MF, DstMPO));
-
- MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg,
- DstMPO, DstAlign);
-
- if (!IsInStack) {
- Handler.assignValueToReg(PointerToStackReg, PhysReg, VA);
- } else {
- MIRBuilder.buildStore(PointerToStackReg, PhysReg,
- DstMPO, DstAlign);
- }
+ FlagAlignment = Flags.getNonZeroOrigAlign();
}
- break;
+ Align DstAlign = std::max(FlagAlignment, inferAlignFromPtrInfo(MF, DstMPO));
+
+ MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg, DstMPO,
+ DstAlign);
+
+ // This value assign is needed here for the case, when the pointer is
+ // being put on the stack before a function call, since there is no
+ // other branch in the later coming code that would assign the pointer
+ // to the register passed to the callee.
+ //
+ // Let's suppose we are on a target, where there are only 32 bit
+ // physical registers. Like the riscv32 target and we want to pass a 128
+ // bit value. If we did not have this branch with the break we would end
+ // up with GMIR like this:
+ //
+ // %1:_(s128) = G_CONSTANT i128 1
+ // %4:_(p0) = G_FRAME_INDEX %stack.1
+ // G_STORE %2:_(s128), %4:_(p0) :: (store (s128), align 8)
+ // $x10 = COPY %1:_(s128)
+ //
+ // So the later code would try to copy the 128 bit value directly into
+ // the 32 bit register.
+ if (VA.isRegLoc()) {
+ Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
+ break;
+ }
+ IndirectParameterPassingHandled = true;
}
if (VA.isMemLoc() && !Flags.isByVal()) {
@@ -874,10 +868,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA);
- continue;
- }
-
- if (VA.isMemLoc() && Flags.isByVal()) {
+ } else if (VA.isMemLoc() && Flags.isByVal()) {
assert(Args[i].Regs.size() == 1 &&
"didn't expect split byval pointer");
@@ -916,25 +907,40 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
DstMPO, DstAlign, SrcMPO, SrcAlign,
MemSize, VA);
}
- continue;
- }
+ // QUESTION: How to keep this assert with the new if then else structured code?
+ //assert(!VA.needsCustom() && "custom loc should have been handled already");
- assert(!VA.needsCustom() && "custom loc should have been handled already");
-
- if (i == 0 && !ThisReturnRegs.empty() &&
+ } else if (i == 0 && !ThisReturnRegs.empty() &&
Handler.isIncomingArgumentHandler() &&
- isTypeIsValidForThisReturn(ValVT)) {
+ isTypeIsValidForThisReturn(ValVT) && VA.isRegLoc()) {
Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA);
- continue;
- }
-
- if (Handler.isIncomingArgumentHandler())
+ } else if (Handler.isIncomingArgumentHandler() && VA.isRegLoc()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
- else {
+ } else if (VA.isRegLoc()) {
DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
});
}
+
+ // Finish the handling of indirect parameter passing when receiving
+ // the value.
+ if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
+ Handler.isIncomingArgumentHandler()) {
+ Align Alignment = DL.getABITypeAlign(Args[i].Ty);
+ MachinePointerInfo DstMPO;
+
+ // Since we are doing indirect parameter passing, we know that the value
+ // in the temporary register is not the value passed to the function,
+ // but rather a pointer to that value. Let's load that value into the
+ // virtual register where the parameter should go.
+ MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], DstMPO,
+ Alignment);
+
+ IndirectParameterPassingHandled = true;
+ }
+
+ if (IndirectParameterPassingHandled)
+ break;
}
if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index c10921d1d8326..fd37cf8b9f346 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -19,7 +19,7 @@
define i64 @callee_128i_in_regs_stack_fst(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i128 %y ) {
; RV32I-LABEL: name: callee_128i_in_regs_stack_fst
; RV32I: bb.1 (%ir-block.0):
- ; RV32I-NEXT: liveins: $v3, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+ ; RV32I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
@@ -34,9 +34,9 @@ define i64 @callee_128i_in_regs_stack_fst(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i1
; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
; RV32I-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
- ; RV32I-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX]](p0)
- ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY8]](p0) :: (load (s128), align 8)
- ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s128)
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 16)
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[LOAD]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD1]](s128)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
@@ -55,12 +55,12 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -87,12 +87,12 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32F-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32F-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32F-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -119,12 +119,12 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32D-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32D-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32D-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+ ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
@@ -149,7 +149,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
define i64 @callee_128i_in_regs_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i64 %x8, i128 %y ) {
; RV32I-LABEL: name: callee_128i_in_regs_stack
; RV32I: bb.1 (%ir-block.0):
- ; RV32I-NEXT: liveins: $v23, $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+ ; RV32I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
@@ -184,9 +184,9 @@ define i64 @callee_128i_in_regs_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x4, i64 %x
; RV32I-NEXT: [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (load (s32) from %fixed-stack.1)
; RV32I-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
; RV32I-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
- ; RV32I-NEXT: [[COPY8:%[0-9]+]]:_(p0) = COPY [[FRAME_INDEX8]](p0)
- ; RV32I-NEXT: [[LOAD8:%[0-9]+]]:_(s128) = G_LOAD [[COPY8]](p0) :: (load (s128), align 8)
- ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD8]](s128)
+ ; RV32I-NEXT: [[LOAD8:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX8]](p0) :: (load (s32) from %fixed-stack.0, align 16)
+ ; RV32I-NEXT: [[LOAD9:%[0-9]+]]:_(s128) = G_LOAD [[LOAD8]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD9]](s128)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
@@ -241,11 +241,11 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
; ILP32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
; ILP32-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
- ; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+ ; ILP32-NEXT: G_STORE [[C8]](s128), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -308,11 +308,11 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32F-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
; ILP32F-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
; ILP32F-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
- ; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+ ; ILP32F-NEXT: G_STORE [[C8]](s128), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -375,11 +375,11 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32D-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
; ILP32D-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
; ILP32D-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
- ; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
- ; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+ ; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+ ; ILP32D-NEXT: G_STORE [[C8]](s128), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
>From 4fb5892f36ab943ff06838882ca76a6ebd952ca0 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Fri, 14 Jun 2024 23:50:36 +0200
Subject: [PATCH 14/29] Format plus add some explanatory comments
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 36 ++++++++++++++------
1 file changed, 26 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 86f11a1aa2d59..a602ba89757df 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -772,7 +772,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
Args[i].Regs.resize(NumParts);
// When we have indirect parameter passing we are receiving a pointer,
- // that points to the actual value. In that case we need a pointer.
+ // that points to the actual value, so we need one "temporary" pointer.
if (VA.getLocInfo() == CCValAssign::Indirect &&
Args[i].Flags[0].isSplit()) {
if (Handler.isIncomingArgumentHandler())
@@ -796,6 +796,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
ValTy, extendOpFromFlags(Args[i].Flags[0]));
}
+
bool IndirectParameterPassingHandled = false;
bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL);
for (unsigned Part = 0; Part < NumParts; ++Part) {
@@ -805,9 +806,20 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
CCValAssign &VA = ArgLocs[j + Idx];
const ISD::ArgFlagsTy Flags = Args[i].Flags[Part];
- // We found an indirect parameter passing and we are at the first part of
- // the value being passed. In this case copy the incoming pointer into a
- // virtual register so later we can load it.
+ // We found an indirect parameter passing, and we have an
+ // OutgoingValueHandler as our handler (so we are at the call site or the
+ // return value). In this case, start the construction of the following
+ // GMIR, that is responsible for the preparation of indirect parameter
+ // passing:
+ //
+ // %1(indirectly passed type) = The value to pass
+ // %3(pointer) = G_FRAME_INDEX %stack.0
+ // G_STORE %1, %3 :: (store (s128), align 8)
+ //
+ // After this GMIR, the remaining part of the loop body will decide how
+ // to get the value to the caller and we break out of the loop.
+ // NOTE: In the case, when the the pointer pointing to the value is passed
+ // in a register there is an exception to this, that is detailed bellow.
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
!Handler.isIncomingArgumentHandler()) {
Align StackAlign = DL.getPrefTypeAlign(Args[i].Ty);
@@ -826,7 +838,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
} else {
FlagAlignment = Flags.getNonZeroOrigAlign();
}
- Align DstAlign = std::max(FlagAlignment, inferAlignFromPtrInfo(MF, DstMPO));
+ Align DstAlign =
+ std::max(FlagAlignment, inferAlignFromPtrInfo(MF, DstMPO));
MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg, DstMPO,
DstAlign);
@@ -907,12 +920,14 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
DstMPO, DstAlign, SrcMPO, SrcAlign,
MemSize, VA);
}
- // QUESTION: How to keep this assert with the new if then else structured code?
- //assert(!VA.needsCustom() && "custom loc should have been handled already");
+ // QUESTION: How to keep this assert with the new if then else
+ // structured code?
+ // assert(!VA.needsCustom() && "custom loc should have been handled
+ // already");
} else if (i == 0 && !ThisReturnRegs.empty() &&
- Handler.isIncomingArgumentHandler() &&
- isTypeIsValidForThisReturn(ValVT) && VA.isRegLoc()) {
+ Handler.isIncomingArgumentHandler() &&
+ isTypeIsValidForThisReturn(ValVT) && VA.isRegLoc()) {
Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA);
} else if (Handler.isIncomingArgumentHandler() && VA.isRegLoc()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
@@ -923,7 +938,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
// Finish the handling of indirect parameter passing when receiving
- // the value.
+ // the value (we are in the called function or the caller when receiving
+ // the return value).
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
Handler.isIncomingArgumentHandler()) {
Align Alignment = DL.getABITypeAlign(Args[i].Ty);
>From c7792b3b6d3efa064d34cd6e327c9d9b8f2ff158 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Sat, 15 Jun 2024 00:04:08 +0200
Subject: [PATCH 15/29] Format and modify a comment
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index a602ba89757df..18482ac7e9cec 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -815,7 +815,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// %1(indirectly passed type) = The value to pass
// %3(pointer) = G_FRAME_INDEX %stack.0
// G_STORE %1, %3 :: (store (s128), align 8)
- //
+ //
// After this GMIR, the remaining part of the loop body will decide how
// to get the value to the caller and we break out of the loop.
// NOTE: In the case, when the the pointer pointing to the value is passed
@@ -844,10 +844,10 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg, DstMPO,
DstAlign);
- // This value assign is needed here for the case, when the pointer is
- // being put on the stack before a function call, since there is no
- // other branch in the later coming code that would assign the pointer
- // to the register passed to the callee.
+ // This value assign is needed here for the case, when the pointer to
+ // stack is passed in a register since there is no other branch in the
+ // later coming code that would copy the pointer to stack to the
+ // register used in parameter passing.
//
// Let's suppose we are on a target, where there are only 32 bit
// physical registers. Like the riscv32 target and we want to pass a 128
>From ddc13570c6c5cf8fd49d17bf0cc6445b78e872e8 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Sat, 15 Jun 2024 08:39:01 +0200
Subject: [PATCH 16/29] Fix bug with passing on stack: Storing value instead of
pointer
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 35 +++++++++----------
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 24 ++++++-------
.../calling-conv-lp64-lp64f-lp64d-common.ll | 6 ++--
3 files changed, 31 insertions(+), 34 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 18482ac7e9cec..c931f59851e0f 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Register.h"
@@ -844,25 +845,21 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg, DstMPO,
DstAlign);
- // This value assign is needed here for the case, when the pointer to
- // stack is passed in a register since there is no other branch in the
- // later coming code that would copy the pointer to stack to the
- // register used in parameter passing.
- //
- // Let's suppose we are on a target, where there are only 32 bit
- // physical registers. Like the riscv32 target and we want to pass a 128
- // bit value. If we did not have this branch with the break we would end
- // up with GMIR like this:
- //
- // %1:_(s128) = G_CONSTANT i128 1
- // %4:_(p0) = G_FRAME_INDEX %stack.1
- // G_STORE %2:_(s128), %4:_(p0) :: (store (s128), align 8)
- // $x10 = COPY %1:_(s128)
- //
- // So the later code would try to copy the 128 bit value directly into
- // the 32 bit register.
- if (VA.isRegLoc()) {
- Handler.assignValueToReg(PointerToStackReg, VA.getLocReg(), VA);
+ // If the value is not on the stack, then dispatch the process of
+ // moving it to the correct place for the call to the rest of the code.
+ if (!VA.isMemLoc()) {
+ ArgReg = PointerToStackReg;
+ }
+ // This value assign or load are needed here for the case, when the
+ // pointer to stack is passed, since there is no other case later that
+ // would handle this.
+ if (VA.isMemLoc()) {
+ LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
+ MachinePointerInfo MPO;
+ auto PassedStackAddress = Handler.getStackAddress(
+ MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
+ MIRBuilder.buildStore(PointerToStackReg, PassedStackAddress, DstMPO,
+ DstAlign);
break;
}
IndirectParameterPassingHandled = true;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index fd37cf8b9f346..8514d3d396bed 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -60,7 +60,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32-NEXT: G_STORE [[C1]](s128), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -92,7 +92,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32F-NEXT: G_STORE [[C1]](s128), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -124,7 +124,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32D-NEXT: G_STORE [[C1]](s128), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
@@ -245,7 +245,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32-NEXT: G_STORE [[C8]](s128), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -312,7 +312,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32F-NEXT: G_STORE [[C8]](s128), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -379,7 +379,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32D-NEXT: G_STORE [[C8]](s128), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
@@ -428,9 +428,9 @@ define i32 @caller_128i_in_regs( ) {
; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128), align 8)
+ ; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -448,9 +448,9 @@ define i32 @caller_128i_in_regs( ) {
; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128), align 8)
+ ; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -468,9 +468,9 @@ define i32 @caller_128i_in_regs( ) {
; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
- ; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128), align 8)
+ ; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -512,9 +512,9 @@ define i32 @caller_256i_in_regs( ) {
; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 8)
- ; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
; ILP32-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 8)
+ ; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -532,9 +532,9 @@ define i32 @caller_256i_in_regs( ) {
; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 8)
- ; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
; ILP32F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 8)
+ ; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -552,9 +552,9 @@ define i32 @caller_256i_in_regs( ) {
; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; ILP32D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 8)
- ; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
; ILP32D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 8)
+ ; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
; ILP32D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index caeb705039abf..af14451a79cf7 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -41,9 +41,9 @@ define i32 @caller_256i_in_regs( ) {
; LP64-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; LP64-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; LP64-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
- ; LP64-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; LP64-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
; LP64-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; LP64-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; LP64-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
; LP64-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -60,9 +60,9 @@ define i32 @caller_256i_in_regs( ) {
; LP64F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; LP64F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; LP64F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
- ; LP64F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; LP64F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
; LP64F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; LP64F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; LP64F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
; LP64F-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
@@ -79,9 +79,9 @@ define i32 @caller_256i_in_regs( ) {
; LP64D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; LP64D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
; LP64D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
- ; LP64D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; LP64D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
; LP64D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; LP64D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; LP64D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
; LP64D-NEXT: ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
>From e17d14aac31aa86bf112ce2dc59ee34e55de89b9 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Sat, 15 Jun 2024 09:29:21 +0200
Subject: [PATCH 17/29] Use MachinePointerInfos getFixedStack
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 3 +-
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 48 +++++++++----------
.../calling-conv-lp64-lp64f-lp64d-common.ll | 12 ++---
3 files changed, 32 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index c931f59851e0f..b64083b2745e3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -831,7 +831,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
Register PointerToStackReg =
MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
- MachinePointerInfo DstMPO;
+ MachinePointerInfo DstMPO =
+ MachinePointerInfo::getFixedStack(MF, FrameIdx);
Align FlagAlignment{};
if (Flags.isByVal()) {
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 8514d3d396bed..dc06a75978317 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -56,11 +56,11 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into %stack.0, align 8)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -88,11 +88,11 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32F-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32F-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into %stack.0, align 8)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -120,11 +120,11 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32D-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32D-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0), align 8)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into %stack.0, align 8)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
@@ -242,10 +242,10 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
; ILP32-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into %stack.0, align 8)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -309,10 +309,10 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32F-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
; ILP32F-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into %stack.0, align 8)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -376,10 +376,10 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32D-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
; ILP32D-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0), align 8)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into %stack.0, align 8)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
@@ -427,9 +427,9 @@ define i32 @caller_128i_in_regs( ) {
; ILP32-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128), align 8)
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1, align 8)
; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
@@ -447,9 +447,9 @@ define i32 @caller_128i_in_regs( ) {
; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128), align 8)
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1, align 8)
; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
@@ -467,9 +467,9 @@ define i32 @caller_128i_in_regs( ) {
; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128), align 8)
+ ; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128), align 8)
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1, align 8)
; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
@@ -511,9 +511,9 @@ define i32 @caller_256i_in_regs( ) {
; ILP32-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 8)
+ ; ILP32-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 8)
; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 8)
+ ; ILP32-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 8)
; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
@@ -531,9 +531,9 @@ define i32 @caller_256i_in_regs( ) {
; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 8)
+ ; ILP32F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 8)
; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 8)
+ ; ILP32F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 8)
; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
@@ -551,9 +551,9 @@ define i32 @caller_256i_in_regs( ) {
; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 8)
+ ; ILP32D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 8)
; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 8)
+ ; ILP32D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 8)
; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index af14451a79cf7..c1f2ab7553b34 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -40,9 +40,9 @@ define i32 @caller_256i_in_regs( ) {
; LP64-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; LP64-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; LP64-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; LP64-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; LP64-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
; LP64-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; LP64-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16)
; LP64-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; LP64-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; LP64-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
@@ -59,9 +59,9 @@ define i32 @caller_256i_in_regs( ) {
; LP64F-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; LP64F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; LP64F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; LP64F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; LP64F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
; LP64F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; LP64F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16)
; LP64F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; LP64F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; LP64F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
@@ -78,9 +78,9 @@ define i32 @caller_256i_in_regs( ) {
; LP64D-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; LP64D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; LP64D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; LP64D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256), align 16)
+ ; LP64D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
; LP64D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; LP64D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256), align 16)
+ ; LP64D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16)
; LP64D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; LP64D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; LP64D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
>From 6787a2a63cc128f55233f771a3b8392dd174c633 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Sun, 16 Jun 2024 17:48:59 +0200
Subject: [PATCH 18/29] Use stack alignment
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 11 ++---
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 48 +++++++++----------
2 files changed, 28 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index b64083b2745e3..7222d5c2bd5fb 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -28,6 +28,7 @@
#include "llvm/IR/Module.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Target/TargetMachine.h"
+#include <cassert>
#define DEBUG_TYPE "call-lowering"
@@ -51,6 +52,8 @@ addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
Flags.setNest();
if (AttrFn(Attribute::ByVal))
Flags.setByVal();
+ if (AttrFn(Attribute::ByRef))
+ Flags.setByRef();
if (AttrFn(Attribute::Preallocated))
Flags.setPreallocated();
if (AttrFn(Attribute::InAlloca))
@@ -834,14 +837,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MachinePointerInfo DstMPO =
MachinePointerInfo::getFixedStack(MF, FrameIdx);
- Align FlagAlignment{};
- if (Flags.isByVal()) {
- FlagAlignment = Flags.getNonZeroByValAlign();
- } else {
- FlagAlignment = Flags.getNonZeroOrigAlign();
- }
Align DstAlign =
- std::max(FlagAlignment, inferAlignFromPtrInfo(MF, DstMPO));
+ std::max(DL.getStackAlignment(), inferAlignFromPtrInfo(MF, DstMPO));
MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg, DstMPO,
DstAlign);
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index dc06a75978317..8be3c837027f4 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -56,11 +56,11 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into %stack.0, align 8)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into %stack.0, align 16)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -88,11 +88,11 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32F-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32F-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into %stack.0, align 8)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into %stack.0, align 16)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -120,11 +120,11 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32D-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32D-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into %stack.0, align 8)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into %stack.0, align 16)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
@@ -242,10 +242,10 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
; ILP32-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
+ ; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into %stack.0, align 8)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into %stack.0, align 16)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -309,10 +309,10 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32F-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
; ILP32F-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
+ ; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into %stack.0, align 8)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into %stack.0, align 16)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -376,10 +376,10 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32D-NEXT: [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
; ILP32D-NEXT: G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
+ ; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into %stack.0, align 8)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into %stack.0, align 16)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
@@ -427,9 +427,9 @@ define i32 @caller_128i_in_regs( ) {
; ILP32-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
+ ; ILP32-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1, align 8)
+ ; ILP32-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1)
; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
@@ -447,9 +447,9 @@ define i32 @caller_128i_in_regs( ) {
; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
+ ; ILP32F-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1, align 8)
+ ; ILP32F-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1)
; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
@@ -467,9 +467,9 @@ define i32 @caller_128i_in_regs( ) {
; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0, align 8)
+ ; ILP32D-NEXT: G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1, align 8)
+ ; ILP32D-NEXT: G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1)
; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
@@ -511,9 +511,9 @@ define i32 @caller_256i_in_regs( ) {
; ILP32-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; ILP32-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 8)
+ ; ILP32-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
; ILP32-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 8)
+ ; ILP32-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16)
; ILP32-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
@@ -531,9 +531,9 @@ define i32 @caller_256i_in_regs( ) {
; ILP32F-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; ILP32F-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 8)
+ ; ILP32F-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
; ILP32F-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 8)
+ ; ILP32F-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16)
; ILP32F-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32F-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
@@ -551,9 +551,9 @@ define i32 @caller_256i_in_regs( ) {
; ILP32D-NEXT: [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
; ILP32D-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; ILP32D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; ILP32D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 8)
+ ; ILP32D-NEXT: G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
; ILP32D-NEXT: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
- ; ILP32D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 8)
+ ; ILP32D-NEXT: G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16)
; ILP32D-NEXT: $x10 = COPY [[FRAME_INDEX]](p0)
; ILP32D-NEXT: $x11 = COPY [[FRAME_INDEX1]](p0)
; ILP32D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
>From 01ac5b40758b51007a20ca8fdff19d32765b1ae8 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Sun, 16 Jun 2024 22:06:54 +0200
Subject: [PATCH 19/29] Draft: Use generic code to store pointer on stack
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 29 ++--
...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 12 +-
.../calling-conv-lp64-lp64f-lp64d-common.ll | 133 +++++++++++++++++-
3 files changed, 156 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 7222d5c2bd5fb..16faaa55be9ff 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -845,21 +845,21 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// If the value is not on the stack, then dispatch the process of
// moving it to the correct place for the call to the rest of the code.
- if (!VA.isMemLoc()) {
+ //if (!VA.isMemLoc()) {
ArgReg = PointerToStackReg;
- }
+ //}
// This value assign or load are needed here for the case, when the
// pointer to stack is passed, since there is no other case later that
// would handle this.
- if (VA.isMemLoc()) {
- LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
- MachinePointerInfo MPO;
- auto PassedStackAddress = Handler.getStackAddress(
- MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
- MIRBuilder.buildStore(PointerToStackReg, PassedStackAddress, DstMPO,
- DstAlign);
- break;
- }
+ //if (VA.isMemLoc()) {
+ // LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
+ // MachinePointerInfo MPO;
+ // auto PassedStackAddress = Handler.getStackAddress(
+ // MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
+ // MIRBuilder.buildStore(PointerToStackReg, PassedStackAddress, DstMPO,
+ // DstAlign);
+ // break;
+ //}
IndirectParameterPassingHandled = true;
}
@@ -875,6 +875,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
Register StackAddr = Handler.getStackAddress(
MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
+ // Finish the handling of indirect passing from the passers
+ // (OutgoingParameterHandler) side
+ if (IndirectParameterPassingHandled) {
+ Handler.assignValueToAddress(ArgReg, StackAddr, MemTy, MPO, VA);
+ break;
+ }
+
Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA);
} else if (VA.isMemLoc() && Flags.isByVal()) {
assert(Args[i].Regs.size() == 1 &&
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 8be3c837027f4..ec847e67eb533 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -60,7 +60,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into %stack.0, align 16)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -92,7 +92,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into %stack.0, align 16)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -124,7 +124,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into %stack.0, align 16)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
@@ -245,7 +245,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into %stack.0, align 16)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -312,7 +312,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into %stack.0, align 16)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -379,7 +379,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into %stack.0, align 16)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index c1f2ab7553b34..439417d90ddff 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -16,8 +16,139 @@
; Check that on RV64, i128 is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.
-define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
+define i32 @caller_256i_in_regs_stack( ) {
+ ; LP64-LABEL: name: caller_256i_in_regs_stack
+ ; LP64: bb.1 (%ir-block.0):
+ ; LP64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; LP64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; LP64-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; LP64-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; LP64-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; LP64-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; LP64-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; LP64-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; LP64-NEXT: [[C8:%[0-9]+]]:_(s256) = G_CONSTANT i256 42
+ ; LP64-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $x2, implicit $x2
+ ; LP64-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; LP64-NEXT: G_STORE [[C8]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
+ ; LP64-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; LP64-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; LP64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+ ; LP64-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (s64) into stack, align 16)
+ ; LP64-NEXT: $x10 = COPY [[C]](s64)
+ ; LP64-NEXT: $x11 = COPY [[C1]](s64)
+ ; LP64-NEXT: $x12 = COPY [[C2]](s64)
+ ; LP64-NEXT: $x13 = COPY [[C3]](s64)
+ ; LP64-NEXT: $x14 = COPY [[C4]](s64)
+ ; LP64-NEXT: $x15 = COPY [[C5]](s64)
+ ; LP64-NEXT: $x16 = COPY [[C6]](s64)
+ ; LP64-NEXT: $x17 = COPY [[C7]](s64)
+ ; LP64-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs_stack, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10
+ ; LP64-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $x2, implicit $x2
+ ; LP64-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10
+ ; LP64-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; LP64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+ ; LP64-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; LP64-NEXT: PseudoRET implicit $x10
+ ;
+ ; LP64F-LABEL: name: caller_256i_in_regs_stack
+ ; LP64F: bb.1 (%ir-block.0):
+ ; LP64F-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; LP64F-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; LP64F-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; LP64F-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; LP64F-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; LP64F-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; LP64F-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; LP64F-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; LP64F-NEXT: [[C8:%[0-9]+]]:_(s256) = G_CONSTANT i256 42
+ ; LP64F-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $x2, implicit $x2
+ ; LP64F-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; LP64F-NEXT: G_STORE [[C8]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
+ ; LP64F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; LP64F-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; LP64F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+ ; LP64F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (s64) into stack, align 16)
+ ; LP64F-NEXT: $x10 = COPY [[C]](s64)
+ ; LP64F-NEXT: $x11 = COPY [[C1]](s64)
+ ; LP64F-NEXT: $x12 = COPY [[C2]](s64)
+ ; LP64F-NEXT: $x13 = COPY [[C3]](s64)
+ ; LP64F-NEXT: $x14 = COPY [[C4]](s64)
+ ; LP64F-NEXT: $x15 = COPY [[C5]](s64)
+ ; LP64F-NEXT: $x16 = COPY [[C6]](s64)
+ ; LP64F-NEXT: $x17 = COPY [[C7]](s64)
+ ; LP64F-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs_stack, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10
+ ; LP64F-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $x2, implicit $x2
+ ; LP64F-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10
+ ; LP64F-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; LP64F-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+ ; LP64F-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; LP64F-NEXT: PseudoRET implicit $x10
+ ;
+ ; LP64D-LABEL: name: caller_256i_in_regs_stack
+ ; LP64D: bb.1 (%ir-block.0):
+ ; LP64D-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; LP64D-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; LP64D-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; LP64D-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; LP64D-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; LP64D-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; LP64D-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; LP64D-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+ ; LP64D-NEXT: [[C8:%[0-9]+]]:_(s256) = G_CONSTANT i256 42
+ ; LP64D-NEXT: ADJCALLSTACKDOWN 8, 0, implicit-def $x2, implicit $x2
+ ; LP64D-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+ ; LP64D-NEXT: G_STORE [[C8]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
+ ; LP64D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+ ; LP64D-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; LP64D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+ ; LP64D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (s64) into stack, align 16)
+ ; LP64D-NEXT: $x10 = COPY [[C]](s64)
+ ; LP64D-NEXT: $x11 = COPY [[C1]](s64)
+ ; LP64D-NEXT: $x12 = COPY [[C2]](s64)
+ ; LP64D-NEXT: $x13 = COPY [[C3]](s64)
+ ; LP64D-NEXT: $x14 = COPY [[C4]](s64)
+ ; LP64D-NEXT: $x15 = COPY [[C5]](s64)
+ ; LP64D-NEXT: $x16 = COPY [[C6]](s64)
+ ; LP64D-NEXT: $x17 = COPY [[C7]](s64)
+ ; LP64D-NEXT: PseudoCALL target-flags(riscv-call) @callee_256i_in_regs_stack, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10
+ ; LP64D-NEXT: ADJCALLSTACKUP 8, 0, implicit-def $x2, implicit $x2
+ ; LP64D-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x10
+ ; LP64D-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+ ; LP64D-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+ ; LP64D-NEXT: $x10 = COPY [[ANYEXT]](s64)
+ ; LP64D-NEXT: PseudoRET implicit $x10
+ %1 = call i64 @callee_256i_in_regs_stack(i64 1, i64 2, i64 3, i64 4,i64 5,i64 6,i64 7,i64 8, i256 42)
+ %2 = trunc i64 %1 to i32
+ ret i32 %2
+}
+
+
+define i64 @callee_256i_in_regs_stack(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i64 %x8, i256 %y) {
+ ; RV64I-LABEL: name: callee_256i_in_regs_stack
+ ; RV64I: bb.1 (%ir-block.0):
+ ; RV64I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+ ; RV64I-NEXT: {{ $}}
+ ; RV64I-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+ ; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
+ ; RV64I-NEXT: [[COPY2:%[0-9]+]]:_(s64) = COPY $x12
+ ; RV64I-NEXT: [[COPY3:%[0-9]+]]:_(s64) = COPY $x13
+ ; RV64I-NEXT: [[COPY4:%[0-9]+]]:_(s64) = COPY $x14
+ ; RV64I-NEXT: [[COPY5:%[0-9]+]]:_(s64) = COPY $x15
+ ; RV64I-NEXT: [[COPY6:%[0-9]+]]:_(s64) = COPY $x16
+ ; RV64I-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $x17
+ ; RV64I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+ ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %fixed-stack.0, align 16)
+ ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[LOAD]](p0) :: (load (s256), align 16)
+ ; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD1]](s256)
+ ; RV64I-NEXT: $x10 = COPY [[TRUNC]](s64)
+ ; RV64I-NEXT: PseudoRET implicit $x10
+ %2 = trunc i256 %y to i64
+ ret i64 %2
+}
+
+define i64 @callee_256i_in_regs(i256 %x, i256 %y) {
; RV64I-LABEL: name: callee_256i_in_regs
; RV64I: bb.1 (%ir-block.0):
; RV64I-NEXT: liveins: $x10, $x11
>From 746dd4caafb433efa00495a8a9583d38e8446035 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 17 Jun 2024 09:35:54 +0200
Subject: [PATCH 20/29] Work on alignments and use load later
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 10 ++++++++--
.../calling-conv-ilp32-ilp32f-ilp32d-common.ll | 12 ++++++------
.../calling-conv-lp64-lp64f-lp64d-common.ll | 6 +++---
3 files changed, 17 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 16faaa55be9ff..37d7e3b8b7a61 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -23,9 +23,11 @@
#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
#include "llvm/Support/Alignment.h"
#include "llvm/Target/TargetMachine.h"
#include <cassert>
@@ -846,7 +848,6 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// If the value is not on the stack, then dispatch the process of
// moving it to the correct place for the call to the rest of the code.
//if (!VA.isMemLoc()) {
- ArgReg = PointerToStackReg;
//}
// This value assign or load are needed here for the case, when the
// pointer to stack is passed, since there is no other case later that
@@ -860,6 +861,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// DstAlign);
// break;
//}
+ ArgReg = PointerToStackReg;
IndirectParameterPassingHandled = true;
}
@@ -878,7 +880,11 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// Finish the handling of indirect passing from the passers
// (OutgoingParameterHandler) side
if (IndirectParameterPassingHandled) {
- Handler.assignValueToAddress(ArgReg, StackAddr, MemTy, MPO, VA);
+ PointerType *PtrInAllocaTy =
+ PointerType::get(MIRBuilder.getContext(), AllocaAddressSpace);
+ Align AlignPtrInAlloca = DL.getPrefTypeAlign(PtrInAllocaTy);
+
+ MIRBuilder.buildStore(ArgReg, StackAddr, MPO, AlignPtrInAlloca);
break;
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index ec847e67eb533..96ae9bc6d472a 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -60,7 +60,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -92,7 +92,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -124,7 +124,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
@@ -245,7 +245,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -312,7 +312,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -379,7 +379,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (s32) into stack + 32, align 16)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index 439417d90ddff..871386f7f8e26 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -34,7 +34,7 @@ define i32 @caller_256i_in_regs_stack( ) {
; LP64-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; LP64-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; LP64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
- ; LP64-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (s64) into stack, align 16)
+ ; LP64-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack)
; LP64-NEXT: $x10 = COPY [[C]](s64)
; LP64-NEXT: $x11 = COPY [[C1]](s64)
; LP64-NEXT: $x12 = COPY [[C2]](s64)
@@ -68,7 +68,7 @@ define i32 @caller_256i_in_regs_stack( ) {
; LP64F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; LP64F-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; LP64F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
- ; LP64F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (s64) into stack, align 16)
+ ; LP64F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack)
; LP64F-NEXT: $x10 = COPY [[C]](s64)
; LP64F-NEXT: $x11 = COPY [[C1]](s64)
; LP64F-NEXT: $x12 = COPY [[C2]](s64)
@@ -102,7 +102,7 @@ define i32 @caller_256i_in_regs_stack( ) {
; LP64D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; LP64D-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; LP64D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
- ; LP64D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (s64) into stack, align 16)
+ ; LP64D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack)
; LP64D-NEXT: $x10 = COPY [[C]](s64)
; LP64D-NEXT: $x11 = COPY [[C1]](s64)
; LP64D-NEXT: $x12 = COPY [[C2]](s64)
>From 7ac313a535808b45b2bf4f11fb48ddc4c2f790fa Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 17 Jun 2024 09:36:26 +0200
Subject: [PATCH 21/29] Remove comments
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 16 ----------------
1 file changed, 16 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 37d7e3b8b7a61..7d956fb21ecb6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -845,22 +845,6 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg, DstMPO,
DstAlign);
- // If the value is not on the stack, then dispatch the process of
- // moving it to the correct place for the call to the rest of the code.
- //if (!VA.isMemLoc()) {
- //}
- // This value assign or load are needed here for the case, when the
- // pointer to stack is passed, since there is no other case later that
- // would handle this.
- //if (VA.isMemLoc()) {
- // LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
- // MachinePointerInfo MPO;
- // auto PassedStackAddress = Handler.getStackAddress(
- // MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
- // MIRBuilder.buildStore(PointerToStackReg, PassedStackAddress, DstMPO,
- // DstAlign);
- // break;
- //}
ArgReg = PointerToStackReg;
IndirectParameterPassingHandled = true;
}
>From 20ec107921f2580b4da4fa435d30508a0c063410 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 17 Jun 2024 13:20:17 +0200
Subject: [PATCH 22/29] Use inferAlignFromPtrInfo
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 9 +++------
.../calling-conv-ilp32-ilp32f-ilp32d-common.ll | 12 ++++++------
.../calling-conv-lp64-lp64f-lp64d-common.ll | 6 +++---
3 files changed, 12 insertions(+), 15 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 7d956fb21ecb6..ab0c181af49e3 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -857,18 +857,15 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// store. We may need to adjust the offset for big endian targets.
LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
- MachinePointerInfo MPO;
+ MachinePointerInfo MPO(ArgReg);
Register StackAddr = Handler.getStackAddress(
MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
// Finish the handling of indirect passing from the passers
// (OutgoingParameterHandler) side
if (IndirectParameterPassingHandled) {
- PointerType *PtrInAllocaTy =
- PointerType::get(MIRBuilder.getContext(), AllocaAddressSpace);
- Align AlignPtrInAlloca = DL.getPrefTypeAlign(PtrInAllocaTy);
-
- MIRBuilder.buildStore(ArgReg, StackAddr, MPO, AlignPtrInAlloca);
+ Align AlignPtr = inferAlignFromPtrInfo(MF, MPO);
+ MIRBuilder.buildStore(ArgReg, StackAddr, MPO, AlignPtr);
break;
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 96ae9bc6d472a..28d6c0de9748d 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -60,7 +60,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -92,7 +92,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -124,7 +124,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
@@ -245,7 +245,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32, align 1)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -312,7 +312,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32, align 1)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -379,7 +379,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32, align 1)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index 871386f7f8e26..ab3a8fdc560c9 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -34,7 +34,7 @@ define i32 @caller_256i_in_regs_stack( ) {
; LP64-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; LP64-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; LP64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
- ; LP64-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack)
+ ; LP64-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
; LP64-NEXT: $x10 = COPY [[C]](s64)
; LP64-NEXT: $x11 = COPY [[C1]](s64)
; LP64-NEXT: $x12 = COPY [[C2]](s64)
@@ -68,7 +68,7 @@ define i32 @caller_256i_in_regs_stack( ) {
; LP64F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; LP64F-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; LP64F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
- ; LP64F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack)
+ ; LP64F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
; LP64F-NEXT: $x10 = COPY [[C]](s64)
; LP64F-NEXT: $x11 = COPY [[C1]](s64)
; LP64F-NEXT: $x12 = COPY [[C2]](s64)
@@ -102,7 +102,7 @@ define i32 @caller_256i_in_regs_stack( ) {
; LP64D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; LP64D-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; LP64D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
- ; LP64D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack)
+ ; LP64D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
; LP64D-NEXT: $x10 = COPY [[C]](s64)
; LP64D-NEXT: $x11 = COPY [[C1]](s64)
; LP64D-NEXT: $x12 = COPY [[C2]](s64)
>From 42cb4a49a73a29c7bafa822e21a2eb43b9c4400c Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 17 Jun 2024 14:39:15 +0200
Subject: [PATCH 23/29] Use the handlers function
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 6 +++---
.../calling-conv-ilp32-ilp32f-ilp32d-common.ll | 12 ++++++------
.../calling-conv-lp64-lp64f-lp64d-common.ll | 6 +++---
3 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index ab0c181af49e3..8d205e378e0b7 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -863,9 +863,9 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// Finish the handling of indirect passing from the passers
// (OutgoingParameterHandler) side
- if (IndirectParameterPassingHandled) {
- Align AlignPtr = inferAlignFromPtrInfo(MF, MPO);
- MIRBuilder.buildStore(ArgReg, StackAddr, MPO, AlignPtr);
+
+ if (IndirectParameterPassingHandled) {
+ Handler.assignValueToAddress(ArgReg, StackAddr, PointerTy, MPO, VA);
break;
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 28d6c0de9748d..e050dbdf9b90d 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -60,7 +60,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 16)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -92,7 +92,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32F-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 16)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -124,7 +124,7 @@ define i32 @caller_128i_in_regs_stack_fst( ) {
; ILP32D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; ILP32D-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; ILP32D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 16)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
@@ -245,7 +245,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32, align 1)
+ ; ILP32-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32, align 16)
; ILP32-NEXT: $x10 = COPY [[UV]](s32)
; ILP32-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32-NEXT: $x12 = COPY [[UV2]](s32)
@@ -312,7 +312,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32F-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32F-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32F-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32, align 1)
+ ; ILP32F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32, align 16)
; ILP32F-NEXT: $x10 = COPY [[UV]](s32)
; ILP32F-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32F-NEXT: $x12 = COPY [[UV2]](s32)
@@ -379,7 +379,7 @@ define i32 @caller_128i_in_regs_stack() {
; ILP32D-NEXT: G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
; ILP32D-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
; ILP32D-NEXT: [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
- ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32, align 1)
+ ; ILP32D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32, align 16)
; ILP32D-NEXT: $x10 = COPY [[UV]](s32)
; ILP32D-NEXT: $x11 = COPY [[UV1]](s32)
; ILP32D-NEXT: $x12 = COPY [[UV2]](s32)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index ab3a8fdc560c9..e517b019c9c0a 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -34,7 +34,7 @@ define i32 @caller_256i_in_regs_stack( ) {
; LP64-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; LP64-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; LP64-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
- ; LP64-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
+ ; LP64-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 16)
; LP64-NEXT: $x10 = COPY [[C]](s64)
; LP64-NEXT: $x11 = COPY [[C1]](s64)
; LP64-NEXT: $x12 = COPY [[C2]](s64)
@@ -68,7 +68,7 @@ define i32 @caller_256i_in_regs_stack( ) {
; LP64F-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; LP64F-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; LP64F-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
- ; LP64F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
+ ; LP64F-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 16)
; LP64F-NEXT: $x10 = COPY [[C]](s64)
; LP64F-NEXT: $x11 = COPY [[C1]](s64)
; LP64F-NEXT: $x12 = COPY [[C2]](s64)
@@ -102,7 +102,7 @@ define i32 @caller_256i_in_regs_stack( ) {
; LP64D-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; LP64D-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; LP64D-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
- ; LP64D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 1)
+ ; LP64D-NEXT: G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 16)
; LP64D-NEXT: $x10 = COPY [[C]](s64)
; LP64D-NEXT: $x11 = COPY [[C1]](s64)
; LP64D-NEXT: $x12 = COPY [[C2]](s64)
>From 31150f108e865fbd3fc34f808bca369cab96b8b3 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 17 Jun 2024 14:43:40 +0200
Subject: [PATCH 24/29] Remove redundant line break
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 8d205e378e0b7..606b50163e8bb 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -863,8 +863,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// Finish the handling of indirect passing from the passers
// (OutgoingParameterHandler) side
-
- if (IndirectParameterPassingHandled) {
+ if (IndirectParameterPassingHandled) {
Handler.assignValueToAddress(ArgReg, StackAddr, PointerTy, MPO, VA);
break;
}
>From 1a68f0f2ffea0413f64731b32631e324cc6148b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?G=C3=A1bor=20Spaits?= <gaborspaits1 at gmail.com>
Date: Mon, 17 Jun 2024 15:59:48 +0200
Subject: [PATCH 25/29] Update llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
Co-authored-by: Sergei Barannikov <barannikov88 at gmail.com>
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 606b50163e8bb..24f4542a3448e 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -862,7 +862,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
// Finish the handling of indirect passing from the passers
- // (OutgoingParameterHandler) side
+ // (OutgoingParameterHandler) side.
if (IndirectParameterPassingHandled) {
Handler.assignValueToAddress(ArgReg, StackAddr, PointerTy, MPO, VA);
break;
>From f911c500dc24950899eaea8f6aa8746da56bcb47 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 17 Jun 2024 16:10:56 +0200
Subject: [PATCH 26/29] Remove redundant branch
---
llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp | 4 +---
1 file changed, 1 insertion(+), 3 deletions(-)
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 180e238a0a978..86ce007343b32 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -89,9 +89,7 @@ struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
const MachinePointerInfo &MPO,
const CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
- uint64_t Offset = 0;
- if (VA.isMemLoc())
- Offset = VA.getLocMemOffset();
+ uint64_t Offset = VA.getLocMemOffset();
// TODO: Move StackAlignment to subtarget and share with FrameLowering.
auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
>From 5e57688a151e7e4a2136c389b8317d24474c8db4 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 17 Jun 2024 16:42:23 +0200
Subject: [PATCH 27/29] Keep the original semantics as much as possible
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 17 +++++++----------
1 file changed, 7 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 24f4542a3448e..cf9d02fccb5d6 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -908,18 +908,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
DstMPO, DstAlign, SrcMPO, SrcAlign,
MemSize, VA);
}
- // QUESTION: How to keep this assert with the new if then else
- // structured code?
- // assert(!VA.needsCustom() && "custom loc should have been handled
- // already");
-
} else if (i == 0 && !ThisReturnRegs.empty() &&
Handler.isIncomingArgumentHandler() &&
- isTypeIsValidForThisReturn(ValVT) && VA.isRegLoc()) {
+ isTypeIsValidForThisReturn(ValVT))
Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA);
- } else if (Handler.isIncomingArgumentHandler() && VA.isRegLoc()) {
+ else if (Handler.isIncomingArgumentHandler())
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
- } else if (VA.isRegLoc()) {
+ else {
DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
});
@@ -947,10 +942,12 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
break;
}
+ // Now that all pieces have been assigned, re-pack the register typed
+ // values into the original value typed registers.
if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
!IndirectParameterPassingHandled) {
- // Now that all pieces have been assigned, re-pack the register typed
- // values into the original value typed registers.
+ // Merge the split registers into the expected larger result vregs of
+ // the original call.
buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
LocTy, Args[i].Flags[0]);
}
>From 958e3bfbf05350e14e2e0381318584aae3b4d0cd Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 17 Jun 2024 17:24:25 +0200
Subject: [PATCH 28/29] Modify some MPOs
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 6 +++---
.../calling-conv-ilp32-ilp32f-ilp32d-common.ll | 12 ++++++------
.../calling-conv-lp64-lp64f-lp64d-common.ll | 6 +++---
3 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index cf9d02fccb5d6..a46796be78cd9 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -857,7 +857,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
// store. We may need to adjust the offset for big endian targets.
LLT MemTy = Handler.getStackValueStoreType(DL, VA, Flags);
- MachinePointerInfo MPO(ArgReg);
+ MachinePointerInfo MPO;
Register StackAddr = Handler.getStackAddress(
MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
@@ -926,13 +926,13 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
Handler.isIncomingArgumentHandler()) {
Align Alignment = DL.getABITypeAlign(Args[i].Ty);
- MachinePointerInfo DstMPO;
+ MachinePointerInfo MPO(Args[i].Regs[0]);
// Since we are doing indirect parameter passing, we know that the value
// in the temporary register is not the value passed to the function,
// but rather a pointer to that value. Let's load that value into the
// virtual register where the parameter should go.
- MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], DstMPO,
+ MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], MPO,
Alignment);
IndirectParameterPassingHandled = true;
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index e050dbdf9b90d..d65231ba35b1f 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -35,7 +35,7 @@ define i64 @callee_128i_in_regs_stack_fst(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i1
; RV32I-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 16)
- ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[LOAD]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[LOAD]](p0) :: (load (s128), align 8, addrspace 2147483661)
; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD1]](s128)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
@@ -185,7 +185,7 @@ define i64 @callee_128i_in_regs_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x4, i64 %x
; RV32I-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
; RV32I-NEXT: [[FRAME_INDEX8:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; RV32I-NEXT: [[LOAD8:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX8]](p0) :: (load (s32) from %fixed-stack.0, align 16)
- ; RV32I-NEXT: [[LOAD9:%[0-9]+]]:_(s128) = G_LOAD [[LOAD8]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[LOAD9:%[0-9]+]]:_(s128) = G_LOAD [[LOAD8]](p0) :: (load (s128), align 8, addrspace 2147483681)
; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD9]](s128)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
@@ -408,9 +408,9 @@ define i64 @callee_128i_in_regs(i128 %x, i128 %y ) {
; RV32I-NEXT: liveins: $x10, $x11
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
- ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128), align 8, addrspace 2147483650)
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
- ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128), align 8)
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128), align 8, addrspace 2147483651)
; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s128)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
@@ -492,9 +492,9 @@ define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
; RV32I-NEXT: liveins: $x10, $x11
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
- ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 8)
+ ; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 8, addrspace 2147483650)
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
- ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 8)
+ ; RV32I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 8, addrspace 2147483651)
; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index e517b019c9c0a..0b0110de65a81 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -139,7 +139,7 @@ define i64 @callee_256i_in_regs_stack(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x
; RV64I-NEXT: [[COPY7:%[0-9]+]]:_(s64) = COPY $x17
; RV64I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %fixed-stack.0, align 16)
- ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[LOAD]](p0) :: (load (s256), align 16)
+ ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[LOAD]](p0) :: (load (s256), align 16, addrspace 2147483657)
; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD1]](s256)
; RV64I-NEXT: $x10 = COPY [[TRUNC]](s64)
; RV64I-NEXT: PseudoRET implicit $x10
@@ -154,9 +154,9 @@ define i64 @callee_256i_in_regs(i256 %x, i256 %y) {
; RV64I-NEXT: liveins: $x10, $x11
; RV64I-NEXT: {{ $}}
; RV64I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x10
- ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 16)
+ ; RV64I-NEXT: [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 16, addrspace 2147483650)
; RV64I-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
- ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 16)
+ ; RV64I-NEXT: [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 16, addrspace 2147483651)
; RV64I-NEXT: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
; RV64I-NEXT: $x10 = COPY [[TRUNC]](s64)
; RV64I-NEXT: PseudoRET implicit $x10
>From f7d6974d2872155c4317120726a38fda07e0a297 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 17 Jun 2024 18:33:09 +0200
Subject: [PATCH 29/29] Format
---
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 16 ++++++----------
.../lib/Target/RISCV/GISel/RISCVCallLowering.cpp | 7 ++++---
2 files changed, 10 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index a46796be78cd9..46a78bafff7a2 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -17,20 +17,14 @@
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
-#include "llvm/IR/Type.h"
-#include "llvm/Support/Alignment.h"
#include "llvm/Target/TargetMachine.h"
-#include <cassert>
#define DEBUG_TYPE "call-lowering"
@@ -167,8 +161,9 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, const CallBase &CB,
// callee must be in the same TU and therefore we can direct-call it without
// worrying about it being out of range.
Info.Callee = MachineOperand::CreateGA(cast<GlobalValue>(CalleeV), 0);
- } else
+ } else {
Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
+ }
Register ReturnHintAlignReg;
Align ReturnHintAlign;
@@ -910,11 +905,12 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
}
} else if (i == 0 && !ThisReturnRegs.empty() &&
Handler.isIncomingArgumentHandler() &&
- isTypeIsValidForThisReturn(ValVT))
+ isTypeIsValidForThisReturn(ValVT)) {
+
Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA);
- else if (Handler.isIncomingArgumentHandler())
+ } else if (Handler.isIncomingArgumentHandler()) {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
- else {
+ } else {
DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
});
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 86ce007343b32..357ed0fc7b567 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -89,11 +89,12 @@ struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
const MachinePointerInfo &MPO,
const CCValAssign &VA) override {
MachineFunction &MF = MIRBuilder.getMF();
- uint64_t Offset = VA.getLocMemOffset();
+ uint64_t LocMemOffset = VA.getLocMemOffset();
// TODO: Move StackAlignment to subtarget and share with FrameLowering.
- auto *MMO = MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
- commonAlignment(Align(16), Offset));
+ auto *MMO =
+ MF.getMachineMemOperand(MPO, MachineMemOperand::MOStore, MemTy,
+ commonAlignment(Align(16), LocMemOffset));
Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildStore(ExtReg, Addr, *MMO);
More information about the llvm-commits
mailing list