[llvm] [GISel][RISCV]Implement indirect parameter passing (PR #95429)

Gábor Spaits via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 17 11:01:35 PDT 2024


https://github.com/spaits updated https://github.com/llvm/llvm-project/pull/95429

>From 28f7db644af3a7c8e5cf1830a825c6fd05360d66 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 17 Jun 2024 18:52:58 +0200
Subject: [PATCH 1/3] Squashed changes

---
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp  | 121 +++-
 .../Target/RISCV/GISel/RISCVCallLowering.cpp  |   4 +-
 ...calling-conv-ilp32-ilp32f-ilp32d-common.ll | 553 ++++++++++++++++++
 .../calling-conv-lp64-lp64f-lp64d-common.ll   | 210 +++++++
 4 files changed, 860 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 412cd0a21ad41..1b4e20f17c1b8 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -36,6 +36,7 @@ void CallLowering::anchor() {}
 static void
 addFlagsUsingAttrFn(ISD::ArgFlagsTy &Flags,
                     const std::function<bool(Attribute::AttrKind)> &AttrFn) {
+  // TODO: There are missing flags. Add them here.
   if (AttrFn(Attribute::SExt))
     Flags.setSExt();
   if (AttrFn(Attribute::ZExt))
@@ -743,6 +744,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
       continue;
     }
 
+    auto AllocaAddressSpace = MF.getDataLayout().getAllocaAddrSpace();
+
     const MVT ValVT = VA.getValVT();
     const MVT LocVT = VA.getLocVT();
 
@@ -751,6 +754,8 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
     const LLT NewLLT = Handler.isIncomingArgumentHandler() ? LocTy : ValTy;
     const EVT OrigVT = EVT::getEVT(Args[i].Ty);
     const LLT OrigTy = getLLTForType(*Args[i].Ty, DL);
+    const LLT PointerTy = LLT::pointer(
+        AllocaAddressSpace, DL.getPointerSizeInBits(AllocaAddressSpace));
 
     // Expected to be multiple regs for a single incoming arg.
     // There should be Regs.size() ArgLocs per argument.
@@ -765,23 +770,33 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
       // intermediate values.
       Args[i].Regs.resize(NumParts);
 
-      // For each split register, create and assign a vreg that will store
-      // the incoming component of the larger value. These will later be
-      // merged to form the final vreg.
-      for (unsigned Part = 0; Part < NumParts; ++Part)
-        Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
+      // When we have indirect parameter passing we are receiving a pointer,
+      // that points to the actual value, so we need one "temporary" pointer.
+      if (VA.getLocInfo() == CCValAssign::Indirect &&
+          Args[i].Flags[0].isSplit()) {
+        if (Handler.isIncomingArgumentHandler())
+          Args[i].Regs[0] = MRI.createGenericVirtualRegister(PointerTy);
+      } else {
+        // For each split register, create and assign a vreg that will store
+        // the incoming component of the larger value. These will later be
+        // merged to form the final vreg.
+        for (unsigned Part = 0; Part < NumParts; ++Part)
+          Args[i].Regs[Part] = MRI.createGenericVirtualRegister(NewLLT);
+      }
     }
 
     assert((j + (NumParts - 1)) < ArgLocs.size() &&
            "Too many regs for number of args");
 
     // Coerce into outgoing value types before register assignment.
-    if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy) {
+    if (!Handler.isIncomingArgumentHandler() && OrigTy != ValTy &&
+        VA.getLocInfo() != CCValAssign::Indirect) {
       assert(Args[i].OrigRegs.size() == 1);
       buildCopyToRegs(MIRBuilder, Args[i].Regs, Args[i].OrigRegs[0], OrigTy,
                       ValTy, extendOpFromFlags(Args[i].Flags[0]));
     }
 
+    bool IndirectParameterPassingHandled = false;
     bool BigEndianPartOrdering = TLI->hasBigEndianPartOrdering(OrigVT, DL);
     for (unsigned Part = 0; Part < NumParts; ++Part) {
       Register ArgReg = Args[i].Regs[Part];
@@ -790,6 +805,41 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
       CCValAssign &VA = ArgLocs[j + Idx];
       const ISD::ArgFlagsTy Flags = Args[i].Flags[Part];
 
+      // We found an indirect parameter passing, and we have an
+      // OutgoingValueHandler as our handler (so we are at the call site or the
+      // return value). In this case, start the construction of the following
+      // GMIR, that is responsible for the preparation of indirect parameter
+      // passing:
+      //
+      // %1(indirectly passed type) = The value to pass
+      // %3(pointer) = G_FRAME_INDEX %stack.0
+      // G_STORE %1, %3 :: (store (s128), align 8)
+      //
+      // After this GMIR, the remaining part of the loop body will decide how
+      // to get the value to the caller and we break out of the loop.
+      if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
+          !Handler.isIncomingArgumentHandler()) {
+        Align StackAlign = DL.getPrefTypeAlign(Args[i].Ty);
+        MachineFrameInfo &MFI = MF.getFrameInfo();
+        int FrameIdx = MFI.CreateStackObject(OrigTy.getScalarSizeInBits(),
+                                             StackAlign, false);
+
+        Register PointerToStackReg =
+            MIRBuilder.buildFrameIndex(PointerTy, FrameIdx).getReg(0);
+
+        MachinePointerInfo DstMPO =
+            MachinePointerInfo::getFixedStack(MF, FrameIdx);
+
+        Align DstAlign =
+            std::max(DL.getStackAlignment(), inferAlignFromPtrInfo(MF, DstMPO));
+
+        MIRBuilder.buildStore(Args[i].OrigRegs[Part], PointerToStackReg, DstMPO,
+                              DstAlign);
+
+        ArgReg = PointerToStackReg;
+        IndirectParameterPassingHandled = true;
+      }
+
       if (VA.isMemLoc() && !Flags.isByVal()) {
         // Individual pieces may have been spilled to the stack and others
         // passed in registers.
@@ -802,11 +852,17 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
         Register StackAddr = Handler.getStackAddress(
             MemTy.getSizeInBytes(), VA.getLocMemOffset(), MPO, Flags);
 
-        Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA);
-        continue;
-      }
+        // Finish the handling of indirect passing from the passers
+        // (OutgoingParameterHandler) side.
+        // This branch is needed, so the pointer to the value is loaded onto the
+        // stack.
+        if (IndirectParameterPassingHandled) {
+          Handler.assignValueToAddress(ArgReg, StackAddr, PointerTy, MPO, VA);
+          break;
+        }
 
-      if (VA.isMemLoc() && Flags.isByVal()) {
+        Handler.assignValueToAddress(Args[i], Part, StackAddr, MemTy, MPO, VA);
+      } else if (VA.isMemLoc() && Flags.isByVal()) {
         assert(Args[i].Regs.size() == 1 &&
                "didn't expect split byval pointer");
 
@@ -845,30 +901,45 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
                                      DstMPO, DstAlign, SrcMPO, SrcAlign,
                                      MemSize, VA);
         }
-        continue;
-      }
-
-      assert(!VA.needsCustom() && "custom loc should have been handled already");
-
-      if (i == 0 && !ThisReturnRegs.empty() &&
-          Handler.isIncomingArgumentHandler() &&
-          isTypeIsValidForThisReturn(ValVT)) {
+      } else if (i == 0 && !ThisReturnRegs.empty() &&
+                 Handler.isIncomingArgumentHandler() &&
+                 isTypeIsValidForThisReturn(ValVT)) {
         Handler.assignValueToReg(ArgReg, ThisReturnRegs[Part], VA);
-        continue;
-      }
-
-      if (Handler.isIncomingArgumentHandler())
+      } else if (Handler.isIncomingArgumentHandler()) {
         Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
-      else {
+      } else {
         DelayedOutgoingRegAssignments.emplace_back([=, &Handler]() {
           Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
         });
       }
+
+      // Finish the handling of indirect parameter passing when receiving
+      // the value (we are in the called function or the caller when receiving
+      // the return value).
+      if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
+          Handler.isIncomingArgumentHandler()) {
+        Align Alignment = DL.getABITypeAlign(Args[i].Ty);
+        MachinePointerInfo MPO(Args[i].Regs[0]);
+
+        // Since we are doing indirect parameter passing, we know that the value
+        // in the temporary register is not the value passed to the function,
+        // but rather a pointer to that value. Let's load that value into the
+        // virtual register where the parameter should go.
+        MIRBuilder.buildLoad(Args[i].OrigRegs[0], Args[i].Regs[0], MPO,
+                             Alignment);
+
+        IndirectParameterPassingHandled = true;
+      }
+
+      if (IndirectParameterPassingHandled)
+        break;
     }
 
     // Now that all pieces have been assigned, re-pack the register typed values
-    // into the original value typed registers.
-    if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT) {
+    // into the original value typed registers. This is only necessary, when
+    // the value was passed in multiple registers, not indirectly.
+    if (Handler.isIncomingArgumentHandler() && OrigVT != LocVT &&
+        !IndirectParameterPassingHandled) {
       // Merge the split registers into the expected larger result vregs of
       // the original call.
       buildCopyFromRegs(MIRBuilder, Args[i].OrigRegs, Args[i].Regs, OrigTy,
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
index 2bfee45852b20..b5f8715598f3a 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -341,10 +341,8 @@ static bool isLegalElementTypeForRVV(Type *EltTy,
 // TODO: Remove IsLowerArgs argument by adding support for vectors in lowerCall.
 static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget,
                                     bool IsLowerArgs = false) {
-  // TODO: Integers larger than 2*XLen are passed indirectly which is not
-  // supported yet.
   if (T->isIntegerTy())
-    return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
+    return true;
   if (T->isHalfTy() || T->isFloatTy() || T->isDoubleTy())
     return true;
   if (T->isPointerTy())
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
index 1a3489521af19..d65231ba35b1f 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-ilp32d-common.ll
@@ -16,6 +16,559 @@
 ; Check that on RV32, i64 is passed in a pair of registers. Unlike
 ; the convention for varargs, this need not be an aligned pair.
 
+define i64 @callee_128i_in_regs_stack_fst(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i128 %y ) {
+  ; RV32I-LABEL: name: callee_128i_in_regs_stack_fst
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+  ; RV32I-NEXT: {{  $}}
+  ; RV32I-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; RV32I-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+  ; RV32I-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; RV32I-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
+  ; RV32I-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $x13
+  ; RV32I-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; RV32I-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $x14
+  ; RV32I-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $x15
+  ; RV32I-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; RV32I-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $x16
+  ; RV32I-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
+  ; RV32I-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+  ; RV32I-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; RV32I-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 16)
+  ; RV32I-NEXT:   [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[LOAD]](p0) :: (load (s128), align 8, addrspace 2147483661)
+  ; RV32I-NEXT:   [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD1]](s128)
+  ; RV32I-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+  ; RV32I-NEXT:   $x10 = COPY [[UV]](s32)
+  ; RV32I-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10, implicit $x11
+  %2 = trunc i128 %y to i64
+  ret i64 %2
+}
+
+define i32 @caller_128i_in_regs_stack_fst( ) {
+  ; ILP32-LABEL: name: caller_128i_in_regs_stack_fst
+  ; ILP32: bb.1 (%ir-block.0):
+  ; ILP32-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; ILP32-NEXT:   [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+  ; ILP32-NEXT:   ADJCALLSTACKDOWN 4, 0, implicit-def $x2, implicit $x2
+  ; ILP32-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32-NEXT:   [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32-NEXT:   [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32-NEXT:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; ILP32-NEXT:   G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
+  ; ILP32-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+  ; ILP32-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; ILP32-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
+  ; ILP32-NEXT:   G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 16)
+  ; ILP32-NEXT:   $x10 = COPY [[UV]](s32)
+  ; ILP32-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; ILP32-NEXT:   $x12 = COPY [[UV2]](s32)
+  ; ILP32-NEXT:   $x13 = COPY [[UV3]](s32)
+  ; ILP32-NEXT:   $x14 = COPY [[UV4]](s32)
+  ; ILP32-NEXT:   $x15 = COPY [[UV5]](s32)
+  ; ILP32-NEXT:   $x16 = COPY [[UV6]](s32)
+  ; ILP32-NEXT:   $x17 = COPY [[UV7]](s32)
+  ; ILP32-NEXT:   PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack_fst, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+  ; ILP32-NEXT:   ADJCALLSTACKUP 4, 0, implicit-def $x2, implicit $x2
+  ; ILP32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+  ; ILP32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+  ; ILP32-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; ILP32-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+  ; ILP32-NEXT:   $x10 = COPY [[TRUNC]](s32)
+  ; ILP32-NEXT:   PseudoRET implicit $x10
+  ;
+  ; ILP32F-LABEL: name: caller_128i_in_regs_stack_fst
+  ; ILP32F: bb.1 (%ir-block.0):
+  ; ILP32F-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; ILP32F-NEXT:   [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+  ; ILP32F-NEXT:   ADJCALLSTACKDOWN 4, 0, implicit-def $x2, implicit $x2
+  ; ILP32F-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32F-NEXT:   [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32F-NEXT:   [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32F-NEXT:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32F-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; ILP32F-NEXT:   G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
+  ; ILP32F-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+  ; ILP32F-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; ILP32F-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
+  ; ILP32F-NEXT:   G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 16)
+  ; ILP32F-NEXT:   $x10 = COPY [[UV]](s32)
+  ; ILP32F-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; ILP32F-NEXT:   $x12 = COPY [[UV2]](s32)
+  ; ILP32F-NEXT:   $x13 = COPY [[UV3]](s32)
+  ; ILP32F-NEXT:   $x14 = COPY [[UV4]](s32)
+  ; ILP32F-NEXT:   $x15 = COPY [[UV5]](s32)
+  ; ILP32F-NEXT:   $x16 = COPY [[UV6]](s32)
+  ; ILP32F-NEXT:   $x17 = COPY [[UV7]](s32)
+  ; ILP32F-NEXT:   PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack_fst, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+  ; ILP32F-NEXT:   ADJCALLSTACKUP 4, 0, implicit-def $x2, implicit $x2
+  ; ILP32F-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+  ; ILP32F-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+  ; ILP32F-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; ILP32F-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+  ; ILP32F-NEXT:   $x10 = COPY [[TRUNC]](s32)
+  ; ILP32F-NEXT:   PseudoRET implicit $x10
+  ;
+  ; ILP32D-LABEL: name: caller_128i_in_regs_stack_fst
+  ; ILP32D: bb.1 (%ir-block.0):
+  ; ILP32D-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; ILP32D-NEXT:   [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+  ; ILP32D-NEXT:   ADJCALLSTACKDOWN 4, 0, implicit-def $x2, implicit $x2
+  ; ILP32D-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32D-NEXT:   [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32D-NEXT:   [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32D-NEXT:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32D-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; ILP32D-NEXT:   G_STORE [[C1]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
+  ; ILP32D-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+  ; ILP32D-NEXT:   [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; ILP32D-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C2]](s32)
+  ; ILP32D-NEXT:   G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 16)
+  ; ILP32D-NEXT:   $x10 = COPY [[UV]](s32)
+  ; ILP32D-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; ILP32D-NEXT:   $x12 = COPY [[UV2]](s32)
+  ; ILP32D-NEXT:   $x13 = COPY [[UV3]](s32)
+  ; ILP32D-NEXT:   $x14 = COPY [[UV4]](s32)
+  ; ILP32D-NEXT:   $x15 = COPY [[UV5]](s32)
+  ; ILP32D-NEXT:   $x16 = COPY [[UV6]](s32)
+  ; ILP32D-NEXT:   $x17 = COPY [[UV7]](s32)
+  ; ILP32D-NEXT:   PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack_fst, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+  ; ILP32D-NEXT:   ADJCALLSTACKUP 4, 0, implicit-def $x2, implicit $x2
+  ; ILP32D-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+  ; ILP32D-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+  ; ILP32D-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; ILP32D-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+  ; ILP32D-NEXT:   $x10 = COPY [[TRUNC]](s32)
+  ; ILP32D-NEXT:   PseudoRET implicit $x10
+  %1 = call i64 @callee_128i_in_regs_stack_fst(i64 1,i64 1, i64 1, i64 1, i128 2)
+  %2 = trunc i64 %1 to i32
+  ret i32 %2
+}
+
+define i64 @callee_128i_in_regs_stack(i64 %x0, i64 %x1, i64 %x2, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i64 %x8, i128 %y ) {
+  ; RV32I-LABEL: name: callee_128i_in_regs_stack
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+  ; RV32I-NEXT: {{  $}}
+  ; RV32I-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; RV32I-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+  ; RV32I-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; RV32I-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
+  ; RV32I-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $x13
+  ; RV32I-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; RV32I-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $x14
+  ; RV32I-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $x15
+  ; RV32I-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; RV32I-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $x16
+  ; RV32I-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
+  ; RV32I-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+  ; RV32I-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.8
+  ; RV32I-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.8, align 16)
+  ; RV32I-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.7
+  ; RV32I-NEXT:   [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (load (s32) from %fixed-stack.7)
+  ; RV32I-NEXT:   [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD]](s32), [[LOAD1]](s32)
+  ; RV32I-NEXT:   [[FRAME_INDEX2:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.6
+  ; RV32I-NEXT:   [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX2]](p0) :: (load (s32) from %fixed-stack.6, align 8)
+  ; RV32I-NEXT:   [[FRAME_INDEX3:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.5
+  ; RV32I-NEXT:   [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX3]](p0) :: (load (s32) from %fixed-stack.5)
+  ; RV32I-NEXT:   [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD2]](s32), [[LOAD3]](s32)
+  ; RV32I-NEXT:   [[FRAME_INDEX4:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.4
+  ; RV32I-NEXT:   [[LOAD4:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX4]](p0) :: (load (s32) from %fixed-stack.4, align 16)
+  ; RV32I-NEXT:   [[FRAME_INDEX5:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.3
+  ; RV32I-NEXT:   [[LOAD5:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX5]](p0) :: (load (s32) from %fixed-stack.3)
+  ; RV32I-NEXT:   [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD4]](s32), [[LOAD5]](s32)
+  ; RV32I-NEXT:   [[FRAME_INDEX6:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.2
+  ; RV32I-NEXT:   [[LOAD6:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX6]](p0) :: (load (s32) from %fixed-stack.2, align 8)
+  ; RV32I-NEXT:   [[FRAME_INDEX7:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
+  ; RV32I-NEXT:   [[LOAD7:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX7]](p0) :: (load (s32) from %fixed-stack.1)
+  ; RV32I-NEXT:   [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[LOAD6]](s32), [[LOAD7]](s32)
+  ; RV32I-NEXT:   [[FRAME_INDEX8:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; RV32I-NEXT:   [[LOAD8:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX8]](p0) :: (load (s32) from %fixed-stack.0, align 16)
+  ; RV32I-NEXT:   [[LOAD9:%[0-9]+]]:_(s128) = G_LOAD [[LOAD8]](p0) :: (load (s128), align 8, addrspace 2147483681)
+  ; RV32I-NEXT:   [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD9]](s128)
+  ; RV32I-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+  ; RV32I-NEXT:   $x10 = COPY [[UV]](s32)
+  ; RV32I-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10, implicit $x11
+  %2 = trunc i128 %y to i64
+  ret i64 %2
+}
+
+define i32 @caller_128i_in_regs_stack() {
+  ; ILP32-LABEL: name: caller_128i_in_regs_stack
+  ; ILP32: bb.1 (%ir-block.0):
+  ; ILP32-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; ILP32-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; ILP32-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; ILP32-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+  ; ILP32-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; ILP32-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+  ; ILP32-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+  ; ILP32-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+  ; ILP32-NEXT:   [[C8:%[0-9]+]]:_(s128) = G_CONSTANT i128 42
+  ; ILP32-NEXT:   ADJCALLSTACKDOWN 36, 0, implicit-def $x2, implicit $x2
+  ; ILP32-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32-NEXT:   [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+  ; ILP32-NEXT:   [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
+  ; ILP32-NEXT:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64)
+  ; ILP32-NEXT:   [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+  ; ILP32-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+  ; ILP32-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; ILP32-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s32)
+  ; ILP32-NEXT:   G_STORE [[UV8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+  ; ILP32-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; ILP32-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s32)
+  ; ILP32-NEXT:   G_STORE [[UV9]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4)
+  ; ILP32-NEXT:   [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+  ; ILP32-NEXT:   [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; ILP32-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s32)
+  ; ILP32-NEXT:   G_STORE [[UV10]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 8)
+  ; ILP32-NEXT:   [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; ILP32-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s32)
+  ; ILP32-NEXT:   G_STORE [[UV11]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12)
+  ; ILP32-NEXT:   [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+  ; ILP32-NEXT:   [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; ILP32-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s32)
+  ; ILP32-NEXT:   G_STORE [[UV12]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 16)
+  ; ILP32-NEXT:   [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; ILP32-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s32)
+  ; ILP32-NEXT:   G_STORE [[UV13]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20)
+  ; ILP32-NEXT:   [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+  ; ILP32-NEXT:   [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+  ; ILP32-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s32)
+  ; ILP32-NEXT:   G_STORE [[UV14]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 8)
+  ; ILP32-NEXT:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+  ; ILP32-NEXT:   [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
+  ; ILP32-NEXT:   G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
+  ; ILP32-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; ILP32-NEXT:   G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
+  ; ILP32-NEXT:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+  ; ILP32-NEXT:   [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+  ; ILP32-NEXT:   G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32, align 16)
+  ; ILP32-NEXT:   $x10 = COPY [[UV]](s32)
+  ; ILP32-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; ILP32-NEXT:   $x12 = COPY [[UV2]](s32)
+  ; ILP32-NEXT:   $x13 = COPY [[UV3]](s32)
+  ; ILP32-NEXT:   $x14 = COPY [[UV4]](s32)
+  ; ILP32-NEXT:   $x15 = COPY [[UV5]](s32)
+  ; ILP32-NEXT:   $x16 = COPY [[UV6]](s32)
+  ; ILP32-NEXT:   $x17 = COPY [[UV7]](s32)
+  ; ILP32-NEXT:   PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+  ; ILP32-NEXT:   ADJCALLSTACKUP 36, 0, implicit-def $x2, implicit $x2
+  ; ILP32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+  ; ILP32-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+  ; ILP32-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; ILP32-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+  ; ILP32-NEXT:   $x10 = COPY [[TRUNC]](s32)
+  ; ILP32-NEXT:   PseudoRET implicit $x10
+  ;
+  ; ILP32F-LABEL: name: caller_128i_in_regs_stack
+  ; ILP32F: bb.1 (%ir-block.0):
+  ; ILP32F-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; ILP32F-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; ILP32F-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; ILP32F-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+  ; ILP32F-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; ILP32F-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+  ; ILP32F-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+  ; ILP32F-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+  ; ILP32F-NEXT:   [[C8:%[0-9]+]]:_(s128) = G_CONSTANT i128 42
+  ; ILP32F-NEXT:   ADJCALLSTACKDOWN 36, 0, implicit-def $x2, implicit $x2
+  ; ILP32F-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32F-NEXT:   [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+  ; ILP32F-NEXT:   [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
+  ; ILP32F-NEXT:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64)
+  ; ILP32F-NEXT:   [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+  ; ILP32F-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+  ; ILP32F-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; ILP32F-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s32)
+  ; ILP32F-NEXT:   G_STORE [[UV8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+  ; ILP32F-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; ILP32F-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s32)
+  ; ILP32F-NEXT:   G_STORE [[UV9]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4)
+  ; ILP32F-NEXT:   [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+  ; ILP32F-NEXT:   [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; ILP32F-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s32)
+  ; ILP32F-NEXT:   G_STORE [[UV10]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 8)
+  ; ILP32F-NEXT:   [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; ILP32F-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s32)
+  ; ILP32F-NEXT:   G_STORE [[UV11]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12)
+  ; ILP32F-NEXT:   [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+  ; ILP32F-NEXT:   [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; ILP32F-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s32)
+  ; ILP32F-NEXT:   G_STORE [[UV12]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 16)
+  ; ILP32F-NEXT:   [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; ILP32F-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s32)
+  ; ILP32F-NEXT:   G_STORE [[UV13]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20)
+  ; ILP32F-NEXT:   [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+  ; ILP32F-NEXT:   [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+  ; ILP32F-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s32)
+  ; ILP32F-NEXT:   G_STORE [[UV14]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 8)
+  ; ILP32F-NEXT:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+  ; ILP32F-NEXT:   [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
+  ; ILP32F-NEXT:   G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
+  ; ILP32F-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; ILP32F-NEXT:   G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
+  ; ILP32F-NEXT:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+  ; ILP32F-NEXT:   [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+  ; ILP32F-NEXT:   G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32, align 16)
+  ; ILP32F-NEXT:   $x10 = COPY [[UV]](s32)
+  ; ILP32F-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; ILP32F-NEXT:   $x12 = COPY [[UV2]](s32)
+  ; ILP32F-NEXT:   $x13 = COPY [[UV3]](s32)
+  ; ILP32F-NEXT:   $x14 = COPY [[UV4]](s32)
+  ; ILP32F-NEXT:   $x15 = COPY [[UV5]](s32)
+  ; ILP32F-NEXT:   $x16 = COPY [[UV6]](s32)
+  ; ILP32F-NEXT:   $x17 = COPY [[UV7]](s32)
+  ; ILP32F-NEXT:   PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+  ; ILP32F-NEXT:   ADJCALLSTACKUP 36, 0, implicit-def $x2, implicit $x2
+  ; ILP32F-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+  ; ILP32F-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+  ; ILP32F-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; ILP32F-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+  ; ILP32F-NEXT:   $x10 = COPY [[TRUNC]](s32)
+  ; ILP32F-NEXT:   PseudoRET implicit $x10
+  ;
+  ; ILP32D-LABEL: name: caller_128i_in_regs_stack
+  ; ILP32D: bb.1 (%ir-block.0):
+  ; ILP32D-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; ILP32D-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; ILP32D-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; ILP32D-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+  ; ILP32D-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; ILP32D-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+  ; ILP32D-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+  ; ILP32D-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+  ; ILP32D-NEXT:   [[C8:%[0-9]+]]:_(s128) = G_CONSTANT i128 42
+  ; ILP32D-NEXT:   ADJCALLSTACKDOWN 36, 0, implicit-def $x2, implicit $x2
+  ; ILP32D-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; ILP32D-NEXT:   [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+  ; ILP32D-NEXT:   [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
+  ; ILP32D-NEXT:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64)
+  ; ILP32D-NEXT:   [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C4]](s64)
+  ; ILP32D-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+  ; ILP32D-NEXT:   [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; ILP32D-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s32)
+  ; ILP32D-NEXT:   G_STORE [[UV8]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+  ; ILP32D-NEXT:   [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
+  ; ILP32D-NEXT:   [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C10]](s32)
+  ; ILP32D-NEXT:   G_STORE [[UV9]](s32), [[PTR_ADD1]](p0) :: (store (s32) into stack + 4)
+  ; ILP32D-NEXT:   [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C5]](s64)
+  ; ILP32D-NEXT:   [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+  ; ILP32D-NEXT:   [[PTR_ADD2:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C11]](s32)
+  ; ILP32D-NEXT:   G_STORE [[UV10]](s32), [[PTR_ADD2]](p0) :: (store (s32) into stack + 8, align 8)
+  ; ILP32D-NEXT:   [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+  ; ILP32D-NEXT:   [[PTR_ADD3:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C12]](s32)
+  ; ILP32D-NEXT:   G_STORE [[UV11]](s32), [[PTR_ADD3]](p0) :: (store (s32) into stack + 12)
+  ; ILP32D-NEXT:   [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C6]](s64)
+  ; ILP32D-NEXT:   [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+  ; ILP32D-NEXT:   [[PTR_ADD4:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C13]](s32)
+  ; ILP32D-NEXT:   G_STORE [[UV12]](s32), [[PTR_ADD4]](p0) :: (store (s32) into stack + 16, align 16)
+  ; ILP32D-NEXT:   [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
+  ; ILP32D-NEXT:   [[PTR_ADD5:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C14]](s32)
+  ; ILP32D-NEXT:   G_STORE [[UV13]](s32), [[PTR_ADD5]](p0) :: (store (s32) into stack + 20)
+  ; ILP32D-NEXT:   [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C7]](s64)
+  ; ILP32D-NEXT:   [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
+  ; ILP32D-NEXT:   [[PTR_ADD6:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C15]](s32)
+  ; ILP32D-NEXT:   G_STORE [[UV14]](s32), [[PTR_ADD6]](p0) :: (store (s32) into stack + 24, align 8)
+  ; ILP32D-NEXT:   [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 28
+  ; ILP32D-NEXT:   [[PTR_ADD7:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C16]](s32)
+  ; ILP32D-NEXT:   G_STORE [[UV15]](s32), [[PTR_ADD7]](p0) :: (store (s32) into stack + 28)
+  ; ILP32D-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; ILP32D-NEXT:   G_STORE [[C8]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
+  ; ILP32D-NEXT:   [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
+  ; ILP32D-NEXT:   [[PTR_ADD8:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C17]](s32)
+  ; ILP32D-NEXT:   G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD8]](p0) :: (store (p0) into stack + 32, align 16)
+  ; ILP32D-NEXT:   $x10 = COPY [[UV]](s32)
+  ; ILP32D-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; ILP32D-NEXT:   $x12 = COPY [[UV2]](s32)
+  ; ILP32D-NEXT:   $x13 = COPY [[UV3]](s32)
+  ; ILP32D-NEXT:   $x14 = COPY [[UV4]](s32)
+  ; ILP32D-NEXT:   $x15 = COPY [[UV5]](s32)
+  ; ILP32D-NEXT:   $x16 = COPY [[UV6]](s32)
+  ; ILP32D-NEXT:   $x17 = COPY [[UV7]](s32)
+  ; ILP32D-NEXT:   PseudoCALL target-flags(riscv-call) @callee_128i_in_regs_stack, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10, implicit-def $x11
+  ; ILP32D-NEXT:   ADJCALLSTACKUP 36, 0, implicit-def $x2, implicit $x2
+  ; ILP32D-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+  ; ILP32D-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $x11
+  ; ILP32D-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; ILP32D-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+  ; ILP32D-NEXT:   $x10 = COPY [[TRUNC]](s32)
+  ; ILP32D-NEXT:   PseudoRET implicit $x10
+  %1 = call i64 @callee_128i_in_regs_stack(i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i128 42)
+  %2 = trunc i64 %1 to i32
+  ret i32 %2
+}
+
+
+define i64 @callee_128i_in_regs(i128 %x, i128 %y ) {
+  ; RV32I-LABEL: name: callee_128i_in_regs
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   liveins: $x10, $x11
+  ; RV32I-NEXT: {{  $}}
+  ; RV32I-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+  ; RV32I-NEXT:   [[LOAD:%[0-9]+]]:_(s128) = G_LOAD [[COPY]](p0) :: (load (s128), align 8, addrspace 2147483650)
+  ; RV32I-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+  ; RV32I-NEXT:   [[LOAD1:%[0-9]+]]:_(s128) = G_LOAD [[COPY1]](p0) :: (load (s128), align 8, addrspace 2147483651)
+  ; RV32I-NEXT:   [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s128)
+  ; RV32I-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+  ; RV32I-NEXT:   $x10 = COPY [[UV]](s32)
+  ; RV32I-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10, implicit $x11
+  %2 = trunc i128 %x to i64
+  ret i64 %2
+}
+
+define i32 @caller_128i_in_regs( ) {
+  ; ILP32-LABEL: name: caller_128i_in_regs
+  ; ILP32: bb.1 (%ir-block.0):
+  ; ILP32-NEXT:   [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1
+  ; ILP32-NEXT:   [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+  ; ILP32-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+  ; ILP32-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; ILP32-NEXT:   G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
+  ; ILP32-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+  ; ILP32-NEXT:   G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1)
+  ; ILP32-NEXT:   $x10 = COPY [[FRAME_INDEX]](p0)
+  ; ILP32-NEXT:   $x11 = COPY [[FRAME_INDEX1]](p0)
+  ; ILP32-NEXT:   PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+  ; ILP32-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+  ; ILP32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; ILP32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+  ; ILP32-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; ILP32-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+  ; ILP32-NEXT:   $x10 = COPY [[TRUNC]](s32)
+  ; ILP32-NEXT:   PseudoRET implicit $x10
+  ;
+  ; ILP32F-LABEL: name: caller_128i_in_regs
+  ; ILP32F: bb.1 (%ir-block.0):
+  ; ILP32F-NEXT:   [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1
+  ; ILP32F-NEXT:   [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+  ; ILP32F-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+  ; ILP32F-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; ILP32F-NEXT:   G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
+  ; ILP32F-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+  ; ILP32F-NEXT:   G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1)
+  ; ILP32F-NEXT:   $x10 = COPY [[FRAME_INDEX]](p0)
+  ; ILP32F-NEXT:   $x11 = COPY [[FRAME_INDEX1]](p0)
+  ; ILP32F-NEXT:   PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+  ; ILP32F-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+  ; ILP32F-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; ILP32F-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+  ; ILP32F-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; ILP32F-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+  ; ILP32F-NEXT:   $x10 = COPY [[TRUNC]](s32)
+  ; ILP32F-NEXT:   PseudoRET implicit $x10
+  ;
+  ; ILP32D-LABEL: name: caller_128i_in_regs
+  ; ILP32D: bb.1 (%ir-block.0):
+  ; ILP32D-NEXT:   [[C:%[0-9]+]]:_(s128) = G_CONSTANT i128 1
+  ; ILP32D-NEXT:   [[C1:%[0-9]+]]:_(s128) = G_CONSTANT i128 2
+  ; ILP32D-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+  ; ILP32D-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; ILP32D-NEXT:   G_STORE [[C]](s128), [[FRAME_INDEX]](p0) :: (store (s128) into %stack.0)
+  ; ILP32D-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+  ; ILP32D-NEXT:   G_STORE [[C1]](s128), [[FRAME_INDEX1]](p0) :: (store (s128) into %stack.1)
+  ; ILP32D-NEXT:   $x10 = COPY [[FRAME_INDEX]](p0)
+  ; ILP32D-NEXT:   $x11 = COPY [[FRAME_INDEX1]](p0)
+  ; ILP32D-NEXT:   PseudoCALL target-flags(riscv-call) @callee_128i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+  ; ILP32D-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+  ; ILP32D-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; ILP32D-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+  ; ILP32D-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; ILP32D-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+  ; ILP32D-NEXT:   $x10 = COPY [[TRUNC]](s32)
+  ; ILP32D-NEXT:   PseudoRET implicit $x10
+  %1 = call i64 @callee_128i_in_regs(i128 1, i128 2)
+  %2 = trunc i64 %1 to i32
+  ret i32 %2
+}
+
+define i64 @callee_256i_in_regs(i256 %x, i256 %y ) {
+
+  ; RV32I-LABEL: name: callee_256i_in_regs
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   liveins: $x10, $x11
+  ; RV32I-NEXT: {{  $}}
+  ; RV32I-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+  ; RV32I-NEXT:   [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 8, addrspace 2147483650)
+  ; RV32I-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+  ; RV32I-NEXT:   [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 8, addrspace 2147483651)
+  ; RV32I-NEXT:   [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
+  ; RV32I-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s64)
+  ; RV32I-NEXT:   $x10 = COPY [[UV]](s32)
+  ; RV32I-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10, implicit $x11
+  %2 = trunc i256 %x to i64
+  ret i64 %2
+}
+
+define i32 @caller_256i_in_regs( ) {
+  ; ILP32-LABEL: name: caller_256i_in_regs
+  ; ILP32: bb.1 (%ir-block.0):
+  ; ILP32-NEXT:   [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+  ; ILP32-NEXT:   [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+  ; ILP32-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+  ; ILP32-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; ILP32-NEXT:   G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
+  ; ILP32-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+  ; ILP32-NEXT:   G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16)
+  ; ILP32-NEXT:   $x10 = COPY [[FRAME_INDEX]](p0)
+  ; ILP32-NEXT:   $x11 = COPY [[FRAME_INDEX1]](p0)
+  ; ILP32-NEXT:   PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+  ; ILP32-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+  ; ILP32-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; ILP32-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+  ; ILP32-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; ILP32-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+  ; ILP32-NEXT:   $x10 = COPY [[TRUNC]](s32)
+  ; ILP32-NEXT:   PseudoRET implicit $x10
+  ;
+  ; ILP32F-LABEL: name: caller_256i_in_regs
+  ; ILP32F: bb.1 (%ir-block.0):
+  ; ILP32F-NEXT:   [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+  ; ILP32F-NEXT:   [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+  ; ILP32F-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+  ; ILP32F-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; ILP32F-NEXT:   G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
+  ; ILP32F-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+  ; ILP32F-NEXT:   G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16)
+  ; ILP32F-NEXT:   $x10 = COPY [[FRAME_INDEX]](p0)
+  ; ILP32F-NEXT:   $x11 = COPY [[FRAME_INDEX1]](p0)
+  ; ILP32F-NEXT:   PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+  ; ILP32F-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+  ; ILP32F-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; ILP32F-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+  ; ILP32F-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; ILP32F-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+  ; ILP32F-NEXT:   $x10 = COPY [[TRUNC]](s32)
+  ; ILP32F-NEXT:   PseudoRET implicit $x10
+  ;
+  ; ILP32D-LABEL: name: caller_256i_in_regs
+  ; ILP32D: bb.1 (%ir-block.0):
+  ; ILP32D-NEXT:   [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+  ; ILP32D-NEXT:   [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+  ; ILP32D-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+  ; ILP32D-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; ILP32D-NEXT:   G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
+  ; ILP32D-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+  ; ILP32D-NEXT:   G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16)
+  ; ILP32D-NEXT:   $x10 = COPY [[FRAME_INDEX]](p0)
+  ; ILP32D-NEXT:   $x11 = COPY [[FRAME_INDEX1]](p0)
+  ; ILP32D-NEXT:   PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10, implicit-def $x11
+  ; ILP32D-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+  ; ILP32D-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; ILP32D-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+  ; ILP32D-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; ILP32D-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV]](s64)
+  ; ILP32D-NEXT:   $x10 = COPY [[TRUNC]](s32)
+  ; ILP32D-NEXT:   PseudoRET implicit $x10
+  %1 = call i64 @callee_256i_in_regs(i256 1, i256 2)
+  %2 = trunc i64 %1 to i32
+  ret i32 %2
+}
+
 define i32 @callee_i64_in_regs(i32 %a, i64 %b) nounwind {
   ; RV32I-LABEL: name: callee_i64_in_regs
   ; RV32I: bb.1 (%ir-block.0):
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
index b175b8d92e6c9..0b0110de65a81 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-lp64-lp64f-lp64d-common.ll
@@ -16,6 +16,216 @@
 ; Check that on RV64, i128 is passed in a pair of registers. Unlike
 ; the convention for varargs, this need not be an aligned pair.
 
+define i32 @caller_256i_in_regs_stack( ) {
+  ; LP64-LABEL: name: caller_256i_in_regs_stack
+  ; LP64: bb.1 (%ir-block.0):
+  ; LP64-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; LP64-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; LP64-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+  ; LP64-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; LP64-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+  ; LP64-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+  ; LP64-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+  ; LP64-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; LP64-NEXT:   [[C8:%[0-9]+]]:_(s256) = G_CONSTANT i256 42
+  ; LP64-NEXT:   ADJCALLSTACKDOWN 8, 0, implicit-def $x2, implicit $x2
+  ; LP64-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; LP64-NEXT:   G_STORE [[C8]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
+  ; LP64-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+  ; LP64-NEXT:   [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; LP64-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+  ; LP64-NEXT:   G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 16)
+  ; LP64-NEXT:   $x10 = COPY [[C]](s64)
+  ; LP64-NEXT:   $x11 = COPY [[C1]](s64)
+  ; LP64-NEXT:   $x12 = COPY [[C2]](s64)
+  ; LP64-NEXT:   $x13 = COPY [[C3]](s64)
+  ; LP64-NEXT:   $x14 = COPY [[C4]](s64)
+  ; LP64-NEXT:   $x15 = COPY [[C5]](s64)
+  ; LP64-NEXT:   $x16 = COPY [[C6]](s64)
+  ; LP64-NEXT:   $x17 = COPY [[C7]](s64)
+  ; LP64-NEXT:   PseudoCALL target-flags(riscv-call) @callee_256i_in_regs_stack, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10
+  ; LP64-NEXT:   ADJCALLSTACKUP 8, 0, implicit-def $x2, implicit $x2
+  ; LP64-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x10
+  ; LP64-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+  ; LP64-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+  ; LP64-NEXT:   $x10 = COPY [[ANYEXT]](s64)
+  ; LP64-NEXT:   PseudoRET implicit $x10
+  ;
+  ; LP64F-LABEL: name: caller_256i_in_regs_stack
+  ; LP64F: bb.1 (%ir-block.0):
+  ; LP64F-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; LP64F-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; LP64F-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+  ; LP64F-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; LP64F-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+  ; LP64F-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+  ; LP64F-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+  ; LP64F-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; LP64F-NEXT:   [[C8:%[0-9]+]]:_(s256) = G_CONSTANT i256 42
+  ; LP64F-NEXT:   ADJCALLSTACKDOWN 8, 0, implicit-def $x2, implicit $x2
+  ; LP64F-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; LP64F-NEXT:   G_STORE [[C8]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
+  ; LP64F-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+  ; LP64F-NEXT:   [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; LP64F-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+  ; LP64F-NEXT:   G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 16)
+  ; LP64F-NEXT:   $x10 = COPY [[C]](s64)
+  ; LP64F-NEXT:   $x11 = COPY [[C1]](s64)
+  ; LP64F-NEXT:   $x12 = COPY [[C2]](s64)
+  ; LP64F-NEXT:   $x13 = COPY [[C3]](s64)
+  ; LP64F-NEXT:   $x14 = COPY [[C4]](s64)
+  ; LP64F-NEXT:   $x15 = COPY [[C5]](s64)
+  ; LP64F-NEXT:   $x16 = COPY [[C6]](s64)
+  ; LP64F-NEXT:   $x17 = COPY [[C7]](s64)
+  ; LP64F-NEXT:   PseudoCALL target-flags(riscv-call) @callee_256i_in_regs_stack, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10
+  ; LP64F-NEXT:   ADJCALLSTACKUP 8, 0, implicit-def $x2, implicit $x2
+  ; LP64F-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x10
+  ; LP64F-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+  ; LP64F-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+  ; LP64F-NEXT:   $x10 = COPY [[ANYEXT]](s64)
+  ; LP64F-NEXT:   PseudoRET implicit $x10
+  ;
+  ; LP64D-LABEL: name: caller_256i_in_regs_stack
+  ; LP64D: bb.1 (%ir-block.0):
+  ; LP64D-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; LP64D-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; LP64D-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+  ; LP64D-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; LP64D-NEXT:   [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+  ; LP64D-NEXT:   [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+  ; LP64D-NEXT:   [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+  ; LP64D-NEXT:   [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
+  ; LP64D-NEXT:   [[C8:%[0-9]+]]:_(s256) = G_CONSTANT i256 42
+  ; LP64D-NEXT:   ADJCALLSTACKDOWN 8, 0, implicit-def $x2, implicit $x2
+  ; LP64D-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; LP64D-NEXT:   G_STORE [[C8]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
+  ; LP64D-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+  ; LP64D-NEXT:   [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+  ; LP64D-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C9]](s64)
+  ; LP64D-NEXT:   G_STORE [[FRAME_INDEX]](p0), [[PTR_ADD]](p0) :: (store (p0) into stack, align 16)
+  ; LP64D-NEXT:   $x10 = COPY [[C]](s64)
+  ; LP64D-NEXT:   $x11 = COPY [[C1]](s64)
+  ; LP64D-NEXT:   $x12 = COPY [[C2]](s64)
+  ; LP64D-NEXT:   $x13 = COPY [[C3]](s64)
+  ; LP64D-NEXT:   $x14 = COPY [[C4]](s64)
+  ; LP64D-NEXT:   $x15 = COPY [[C5]](s64)
+  ; LP64D-NEXT:   $x16 = COPY [[C6]](s64)
+  ; LP64D-NEXT:   $x17 = COPY [[C7]](s64)
+  ; LP64D-NEXT:   PseudoCALL target-flags(riscv-call) @callee_256i_in_regs_stack, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10
+  ; LP64D-NEXT:   ADJCALLSTACKUP 8, 0, implicit-def $x2, implicit $x2
+  ; LP64D-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x10
+  ; LP64D-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
+  ; LP64D-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+  ; LP64D-NEXT:   $x10 = COPY [[ANYEXT]](s64)
+  ; LP64D-NEXT:   PseudoRET implicit $x10
+  %1 = call i64 @callee_256i_in_regs_stack(i64 1, i64 2, i64 3, i64 4,i64 5,i64 6,i64 7,i64 8, i256 42)
+  %2 = trunc i64 %1 to i32
+  ret i32 %2
+}
+
+
+define i64 @callee_256i_in_regs_stack(i64 %x1, i64 %x2, i64 %x3, i64 %x4, i64 %x5, i64 %x6, i64 %x7, i64 %x8, i256 %y) {
+  ; RV64I-LABEL: name: callee_256i_in_regs_stack
+  ; RV64I: bb.1 (%ir-block.0):
+  ; RV64I-NEXT:   liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+  ; RV64I-NEXT: {{  $}}
+  ; RV64I-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+  ; RV64I-NEXT:   [[COPY1:%[0-9]+]]:_(s64) = COPY $x11
+  ; RV64I-NEXT:   [[COPY2:%[0-9]+]]:_(s64) = COPY $x12
+  ; RV64I-NEXT:   [[COPY3:%[0-9]+]]:_(s64) = COPY $x13
+  ; RV64I-NEXT:   [[COPY4:%[0-9]+]]:_(s64) = COPY $x14
+  ; RV64I-NEXT:   [[COPY5:%[0-9]+]]:_(s64) = COPY $x15
+  ; RV64I-NEXT:   [[COPY6:%[0-9]+]]:_(s64) = COPY $x16
+  ; RV64I-NEXT:   [[COPY7:%[0-9]+]]:_(s64) = COPY $x17
+  ; RV64I-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; RV64I-NEXT:   [[LOAD:%[0-9]+]]:_(p0) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s64) from %fixed-stack.0, align 16)
+  ; RV64I-NEXT:   [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[LOAD]](p0) :: (load (s256), align 16, addrspace 2147483657)
+  ; RV64I-NEXT:   [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD1]](s256)
+  ; RV64I-NEXT:   $x10 = COPY [[TRUNC]](s64)
+  ; RV64I-NEXT:   PseudoRET implicit $x10
+  %2 = trunc i256 %y to i64
+  ret i64 %2
+}
+
+
+define i64 @callee_256i_in_regs(i256 %x, i256 %y) {
+  ; RV64I-LABEL: name: callee_256i_in_regs
+  ; RV64I: bb.1 (%ir-block.0):
+  ; RV64I-NEXT:   liveins: $x10, $x11
+  ; RV64I-NEXT: {{  $}}
+  ; RV64I-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x10
+  ; RV64I-NEXT:   [[LOAD:%[0-9]+]]:_(s256) = G_LOAD [[COPY]](p0) :: (load (s256), align 16, addrspace 2147483650)
+  ; RV64I-NEXT:   [[COPY1:%[0-9]+]]:_(p0) = COPY $x11
+  ; RV64I-NEXT:   [[LOAD1:%[0-9]+]]:_(s256) = G_LOAD [[COPY1]](p0) :: (load (s256), align 16, addrspace 2147483651)
+  ; RV64I-NEXT:   [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[LOAD]](s256)
+  ; RV64I-NEXT:   $x10 = COPY [[TRUNC]](s64)
+  ; RV64I-NEXT:   PseudoRET implicit $x10
+  %2 = trunc i256 %x to i64
+  ret i64 %2
+}
+
+define i32 @caller_256i_in_regs( ) {
+  ; LP64-LABEL: name: caller_256i_in_regs
+  ; LP64: bb.1 (%ir-block.0):
+  ; LP64-NEXT:   [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+  ; LP64-NEXT:   [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+  ; LP64-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+  ; LP64-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; LP64-NEXT:   G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
+  ; LP64-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+  ; LP64-NEXT:   G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16)
+  ; LP64-NEXT:   $x10 = COPY [[FRAME_INDEX]](p0)
+  ; LP64-NEXT:   $x11 = COPY [[FRAME_INDEX1]](p0)
+  ; LP64-NEXT:   PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32_lp64, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+  ; LP64-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+  ; LP64-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+  ; LP64-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+  ; LP64-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+  ; LP64-NEXT:   $x10 = COPY [[ANYEXT]](s64)
+  ; LP64-NEXT:   PseudoRET implicit $x10
+  ;
+  ; LP64F-LABEL: name: caller_256i_in_regs
+  ; LP64F: bb.1 (%ir-block.0):
+  ; LP64F-NEXT:   [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+  ; LP64F-NEXT:   [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+  ; LP64F-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+  ; LP64F-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; LP64F-NEXT:   G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
+  ; LP64F-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+  ; LP64F-NEXT:   G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16)
+  ; LP64F-NEXT:   $x10 = COPY [[FRAME_INDEX]](p0)
+  ; LP64F-NEXT:   $x11 = COPY [[FRAME_INDEX1]](p0)
+  ; LP64F-NEXT:   PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32f_lp64f, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+  ; LP64F-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+  ; LP64F-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+  ; LP64F-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+  ; LP64F-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+  ; LP64F-NEXT:   $x10 = COPY [[ANYEXT]](s64)
+  ; LP64F-NEXT:   PseudoRET implicit $x10
+  ;
+  ; LP64D-LABEL: name: caller_256i_in_regs
+  ; LP64D: bb.1 (%ir-block.0):
+  ; LP64D-NEXT:   [[C:%[0-9]+]]:_(s256) = G_CONSTANT i256 1
+  ; LP64D-NEXT:   [[C1:%[0-9]+]]:_(s256) = G_CONSTANT i256 2
+  ; LP64D-NEXT:   ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
+  ; LP64D-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
+  ; LP64D-NEXT:   G_STORE [[C]](s256), [[FRAME_INDEX]](p0) :: (store (s256) into %stack.0, align 16)
+  ; LP64D-NEXT:   [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.1
+  ; LP64D-NEXT:   G_STORE [[C1]](s256), [[FRAME_INDEX1]](p0) :: (store (s256) into %stack.1, align 16)
+  ; LP64D-NEXT:   $x10 = COPY [[FRAME_INDEX]](p0)
+  ; LP64D-NEXT:   $x11 = COPY [[FRAME_INDEX1]](p0)
+  ; LP64D-NEXT:   PseudoCALL target-flags(riscv-call) @callee_256i_in_regs, csr_ilp32d_lp64d, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+  ; LP64D-NEXT:   ADJCALLSTACKUP 0, 0, implicit-def $x2, implicit $x2
+  ; LP64D-NEXT:   [[COPY:%[0-9]+]]:_(s64) = COPY $x10
+  ; LP64D-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
+  ; LP64D-NEXT:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[TRUNC]](s32)
+  ; LP64D-NEXT:   $x10 = COPY [[ANYEXT]](s64)
+  ; LP64D-NEXT:   PseudoRET implicit $x10
+  %1 = call i64 @callee_256i_in_regs(i256 1, i256 2)
+  %2 = trunc i64 %1 to i32
+  ret i32 %2
+}
+
 define i64 @callee_i128_in_regs(i64 %a, i128 %b) nounwind {
   ; RV64I-LABEL: name: callee_i128_in_regs
   ; RV64I: bb.1 (%ir-block.0):

>From 7e04413feb31eb9a32041af66900752e62f4f021 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 17 Jun 2024 19:16:45 +0200
Subject: [PATCH 2/3] Do not require isSplit() for indirect parameter passing

---
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 1b4e20f17c1b8..c0aee6fe3ab47 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -817,7 +817,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
       //
       // After this GMIR, the remaining part of the loop body will decide how
       // to get the value to the caller and we break out of the loop.
-      if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
+      if (VA.getLocInfo() == CCValAssign::Indirect &&
           !Handler.isIncomingArgumentHandler()) {
         Align StackAlign = DL.getPrefTypeAlign(Args[i].Ty);
         MachineFrameInfo &MFI = MF.getFrameInfo();
@@ -916,7 +916,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
       // Finish the handling of indirect parameter passing when receiving
       // the value (we are in the called function or the caller when receiving
       // the return value).
-      if (VA.getLocInfo() == CCValAssign::Indirect && Flags.isSplit() &&
+      if (VA.getLocInfo() == CCValAssign::Indirect &&
           Handler.isIncomingArgumentHandler()) {
         Align Alignment = DL.getABITypeAlign(Args[i].Ty);
         MachinePointerInfo MPO(Args[i].Regs[0]);

>From 53dbee77ec37129a6d33390f9b3f7cfa8fc8e874 Mon Sep 17 00:00:00 2001
From: Gabor Spaits <Gabor.Spaits at hightec-rt.com>
Date: Mon, 17 Jun 2024 20:01:20 +0200
Subject: [PATCH 3/3] Remove redundant isSplit

---
 llvm/lib/CodeGen/GlobalISel/CallLowering.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index c0aee6fe3ab47..b3c89bd226cbb 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -772,8 +772,7 @@ bool CallLowering::handleAssignments(ValueHandler &Handler,
 
       // When we have indirect parameter passing we are receiving a pointer,
       // that points to the actual value, so we need one "temporary" pointer.
-      if (VA.getLocInfo() == CCValAssign::Indirect &&
-          Args[i].Flags[0].isSplit()) {
+      if (VA.getLocInfo() == CCValAssign::Indirect) {
         if (Handler.isIncomingArgumentHandler())
           Args[i].Regs[0] = MRI.createGenericVirtualRegister(PointerTy);
       } else {



More information about the llvm-commits mailing list