[llvm] [RISCV] Software guard direct calls in large code model (PR #109377)

Jesse Huang via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 19 23:15:13 PDT 2024


https://github.com/jaidTw updated https://github.com/llvm/llvm-project/pull/109377

>From 8308d47ddf720371bee36789b855c1d33a5f4080 Mon Sep 17 00:00:00 2001
From: Jesse Huang <jesse.huang at sifive.com>
Date: Sun, 15 Sep 2024 23:15:14 +0800
Subject: [PATCH 1/2] [RISCV][MC] Set Ra for PseudoCALL in if-else clause

---
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index 75323632dd5333..12ee6705fc4366 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -125,11 +125,12 @@ void RISCVMCCodeEmitter::expandFunctionCall(const MCInst &MI,
   MCRegister Ra;
   if (MI.getOpcode() == RISCV::PseudoTAIL) {
     Func = MI.getOperand(0);
-    Ra = RISCV::X6;
     // For Zicfilp, PseudoTAIL should be expanded to a software guarded branch.
     // It means to use t2(x7) as rs1 of JALR to expand PseudoTAIL.
     if (STI.hasFeature(RISCV::FeatureStdExtZicfilp))
       Ra = RISCV::X7;
+    else
+      Ra = RISCV::X6;
   } else if (MI.getOpcode() == RISCV::PseudoCALLReg) {
     Func = MI.getOperand(1);
     Ra = MI.getOperand(0).getReg();

>From a5aaed1648391440ee7216b7afbfffaf5e71bc0f Mon Sep 17 00:00:00 2001
From: Jesse Huang <jesse.huang at sifive.com>
Date: Mon, 16 Sep 2024 02:57:20 +0800
Subject: [PATCH 2/2] [RISCV] Software-guard direct calls in large code model

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  27 +-
 llvm/lib/Target/RISCV/RISCVISelLowering.h   |   7 +-
 llvm/lib/Target/RISCV/RISCVInstrInfo.td     |  22 +-
 llvm/test/CodeGen/RISCV/calls.ll            | 510 +++++++++++++++-----
 llvm/test/CodeGen/RISCV/tail-calls.ll       | 272 ++++++++++-
 5 files changed, 713 insertions(+), 125 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c4458b14f36ece..b6d61aad639960 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -19696,11 +19696,14 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
   // split it and then direct call can be matched by PseudoCALL.
+  bool CalleeIsLargeExternalSymbol = false;
   if (getTargetMachine().getCodeModel() == CodeModel::Large) {
     if (auto *S = dyn_cast<GlobalAddressSDNode>(Callee))
       Callee = getLargeGlobalAddress(S, DL, PtrVT, DAG);
-    else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+    else if (auto *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
       Callee = getLargeExternalSymbol(S, DL, PtrVT, DAG);
+      CalleeIsLargeExternalSymbol = true;
+    }
   } else if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
     const GlobalValue *GV = S->getGlobal();
     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, RISCVII::MO_CALL);
@@ -19736,16 +19739,32 @@ SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
   // Emit the call.
   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
 
+  // Use software guarded branch for large code model non-indirect calls
+  // Tail call to external symbol will have a null CLI.CB and we need another
+  // way to determine the callsite type
+  bool NeedSWGuarded = false;
+  if (getTargetMachine().getCodeModel() == CodeModel::Large &&
+      Subtarget.hasStdExtZicfilp() &&
+      ((CLI.CB && !CLI.CB->isIndirectCall()) || CalleeIsLargeExternalSymbol))
+    NeedSWGuarded = true;
+
   if (IsTailCall) {
     MF.getFrameInfo().setHasTailCall();
-    SDValue Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
+    SDValue Ret;
+    if (NeedSWGuarded)
+      Ret = DAG.getNode(RISCVISD::SW_GUARDED_TAIL, DL, NodeTys, Ops);
+    else
+      Ret = DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
     if (CLI.CFIType)
       Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
     DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
     return Ret;
   }
 
-  Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
+  if (NeedSWGuarded)
+    Chain = DAG.getNode(RISCVISD::SW_GUARDED_CALL, DL, NodeTys, Ops);
+  else
+    Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
   if (CLI.CFIType)
     Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
@@ -20193,6 +20212,8 @@ const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   NODE_NAME_CASE(CZERO_EQZ)
   NODE_NAME_CASE(CZERO_NEZ)
   NODE_NAME_CASE(SW_GUARDED_BRIND)
+  NODE_NAME_CASE(SW_GUARDED_CALL)
+  NODE_NAME_CASE(SW_GUARDED_TAIL)
   NODE_NAME_CASE(TUPLE_INSERT)
   NODE_NAME_CASE(TUPLE_EXTRACT)
   NODE_NAME_CASE(SF_VC_XV_SE)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index ceb9d499002846..05581552ab6041 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -411,9 +411,12 @@ enum NodeType : unsigned {
   CZERO_EQZ, // vt.maskc for XVentanaCondOps.
   CZERO_NEZ, // vt.maskcn for XVentanaCondOps.
 
-  /// Software guarded BRIND node. Operand 0 is the chain operand and
-  /// operand 1 is the target address.
+  // Software guarded BRIND node. Operand 0 is the chain operand and
+  // operand 1 is the target address.
   SW_GUARDED_BRIND,
+  // Software guarded calls for large code model
+  SW_GUARDED_CALL,
+  SW_GUARDED_TAIL,
 
   SF_VC_XV_SE,
   SF_VC_IV_SE,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index fe5623e2920e22..c4e192e5b35790 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -57,6 +57,9 @@ def callseq_end   : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd,
 def riscv_call      : SDNode<"RISCVISD::CALL", SDT_RISCVCall,
                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                               SDNPVariadic]>;
+def riscv_sw_guarded_call : SDNode<"RISCVISD::SW_GUARDED_CALL", SDT_RISCVCall,
+                                   [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                                    SDNPVariadic]>;
 def riscv_ret_glue  : SDNode<"RISCVISD::RET_GLUE", SDTNone,
                              [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
 def riscv_sret_glue : SDNode<"RISCVISD::SRET_GLUE", SDTNone,
@@ -69,6 +72,9 @@ def riscv_brcc      : SDNode<"RISCVISD::BR_CC", SDT_RISCVBrCC,
 def riscv_tail      : SDNode<"RISCVISD::TAIL", SDT_RISCVCall,
                              [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
                               SDNPVariadic]>;
+def riscv_sw_guarded_tail : SDNode<"RISCVISD::SW_GUARDED_TAIL", SDT_RISCVCall,
+                                   [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                                    SDNPVariadic]>;
 def riscv_sw_guarded_brind : SDNode<"RISCVISD::SW_GUARDED_BRIND",
                                     SDTBrind, [SDNPHasChain]>;
 def riscv_sllw      : SDNode<"RISCVISD::SLLW", SDT_RISCVIntBinOpW>;
@@ -1555,10 +1561,15 @@ let Predicates = [NoStdExtZicfilp] in
 def PseudoCALLIndirect : Pseudo<(outs), (ins GPRJALR:$rs1),
                                 [(riscv_call GPRJALR:$rs1)]>,
                          PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>;
-let Predicates = [HasStdExtZicfilp] in
+let Predicates = [HasStdExtZicfilp] in {
 def PseudoCALLIndirectNonX7 : Pseudo<(outs), (ins GPRJALRNonX7:$rs1),
-                                     [(riscv_call GPRJALRNonX7:$rs1)]>,
+                                    [(riscv_call GPRJALRNonX7:$rs1)]>,
                               PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>;
+// For large code model, non-indirect calls could be software-guarded
+def PseudoCALLIndirectX7 : Pseudo<(outs), (ins GPRX7:$rs1),
+                                  [(riscv_sw_guarded_call GPRX7:$rs1)]>,
+                           PseudoInstExpansion<(JALR X1, GPR:$rs1, 0)>;
+}
 }
 
 let isBarrier = 1, isReturn = 1, isTerminator = 1 in
@@ -1579,10 +1590,15 @@ let Predicates = [NoStdExtZicfilp] in
 def PseudoTAILIndirect : Pseudo<(outs), (ins GPRTC:$rs1),
                                 [(riscv_tail GPRTC:$rs1)]>,
                          PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>;
-let Predicates = [HasStdExtZicfilp] in
+let Predicates = [HasStdExtZicfilp] in {
 def PseudoTAILIndirectNonX7 : Pseudo<(outs), (ins GPRTCNonX7:$rs1),
                                      [(riscv_tail GPRTCNonX7:$rs1)]>,
                               PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>;
+// For large code model, non-indirect calls could be software-guarded
+def PseudoTAILIndirectX7 : Pseudo<(outs), (ins GPRX7:$rs1),
+                                  [(riscv_sw_guarded_tail GPRX7:$rs1)]>,
+                           PseudoInstExpansion<(JALR X0, GPR:$rs1, 0)>;
+}
 }
 
 def : Pat<(riscv_tail (iPTR tglobaladdr:$dst)),
diff --git a/llvm/test/CodeGen/RISCV/calls.ll b/llvm/test/CodeGen/RISCV/calls.ll
index 598a026fb95526..48dfe453664a90 100644
--- a/llvm/test/CodeGen/RISCV/calls.ll
+++ b/llvm/test/CodeGen/RISCV/calls.ll
@@ -11,18 +11,29 @@
 ; RUN:   | FileCheck -check-prefix=RV64I-MEDIUM %s
 ; RUN: llc -code-model=large -mtriple=riscv64 -verify-machineinstrs < %s \
 ; RUN:   | FileCheck -check-prefix=RV64I-LARGE %s
+; RUN: llc -code-model=large -mtriple=riscv64 -mattr=experimental-zicfilp -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV64I-LARGE-ZICFILP %s
 
 declare i32 @external_function(i32)
 
 define i32 @test_call_external(i32 %a) nounwind {
-; CHECK-LABEL: test_call_external:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    call external_function
-; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; RV32I-LABEL: test_call_external:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call external_function
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: test_call_external:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    addi sp, sp, -16
+; RV32I-PIC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-PIC-NEXT:    call external_function
+; RV32I-PIC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    addi sp, sp, 16
+; RV32I-PIC-NEXT:    ret
 ;
 ; RV64I-LABEL: test_call_external:
 ; RV64I:       # %bb.0:
@@ -62,6 +73,19 @@ define i32 @test_call_external(i32 %a) nounwind {
 ; RV64I-LARGE-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-LARGE-NEXT:    addi sp, sp, 16
 ; RV64I-LARGE-NEXT:    ret
+;
+; RV64I-LARGE-ZICFILP-LABEL: test_call_external:
+; RV64I-LARGE-ZICFILP:       # %bb.0:
+; RV64I-LARGE-ZICFILP-NEXT:    lpad 0
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
+; RV64I-LARGE-ZICFILP-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LARGE-ZICFILP-NEXT:  .Lpcrel_hi0:
+; RV64I-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI0_0)
+; RV64I-LARGE-ZICFILP-NEXT:    ld t2, %pcrel_lo(.Lpcrel_hi0)(a1)
+; RV64I-LARGE-ZICFILP-NEXT:    jalr t2
+; RV64I-LARGE-ZICFILP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
+; RV64I-LARGE-ZICFILP-NEXT:    ret
   %1 = call i32 @external_function(i32 %a)
   ret i32 %1
 }
@@ -69,14 +93,23 @@ define i32 @test_call_external(i32 %a) nounwind {
 declare dso_local i32 @dso_local_function(i32)
 
 define i32 @test_call_dso_local(i32 %a) nounwind {
-; CHECK-LABEL: test_call_dso_local:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    call dso_local_function
-; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; RV32I-LABEL: test_call_dso_local:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call dso_local_function
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: test_call_dso_local:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    addi sp, sp, -16
+; RV32I-PIC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-PIC-NEXT:    call dso_local_function
+; RV32I-PIC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    addi sp, sp, 16
+; RV32I-PIC-NEXT:    ret
 ;
 ; RV64I-LABEL: test_call_dso_local:
 ; RV64I:       # %bb.0:
@@ -116,15 +149,33 @@ define i32 @test_call_dso_local(i32 %a) nounwind {
 ; RV64I-LARGE-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-LARGE-NEXT:    addi sp, sp, 16
 ; RV64I-LARGE-NEXT:    ret
+;
+; RV64I-LARGE-ZICFILP-LABEL: test_call_dso_local:
+; RV64I-LARGE-ZICFILP:       # %bb.0:
+; RV64I-LARGE-ZICFILP-NEXT:    lpad 0
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
+; RV64I-LARGE-ZICFILP-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LARGE-ZICFILP-NEXT:  .Lpcrel_hi1:
+; RV64I-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI1_0)
+; RV64I-LARGE-ZICFILP-NEXT:    ld t2, %pcrel_lo(.Lpcrel_hi1)(a1)
+; RV64I-LARGE-ZICFILP-NEXT:    jalr t2
+; RV64I-LARGE-ZICFILP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
+; RV64I-LARGE-ZICFILP-NEXT:    ret
   %1 = call i32 @dso_local_function(i32 %a)
   ret i32 %1
 }
 
 define i32 @defined_function(i32 %a) nounwind {
-; CHECK-LABEL: defined_function:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi a0, a0, 1
-; CHECK-NEXT:    ret
+; RV32I-LABEL: defined_function:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a0, a0, 1
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: defined_function:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    addi a0, a0, 1
+; RV32I-PIC-NEXT:    ret
 ;
 ; RV64I-LABEL: defined_function:
 ; RV64I:       # %bb.0:
@@ -145,19 +196,34 @@ define i32 @defined_function(i32 %a) nounwind {
 ; RV64I-LARGE:       # %bb.0:
 ; RV64I-LARGE-NEXT:    addiw a0, a0, 1
 ; RV64I-LARGE-NEXT:    ret
+;
+; RV64I-LARGE-ZICFILP-LABEL: defined_function:
+; RV64I-LARGE-ZICFILP:       # %bb.0:
+; RV64I-LARGE-ZICFILP-NEXT:    lpad 0
+; RV64I-LARGE-ZICFILP-NEXT:    addiw a0, a0, 1
+; RV64I-LARGE-ZICFILP-NEXT:    ret
   %1 = add i32 %a, 1
   ret i32 %1
 }
 
 define i32 @test_call_defined(i32 %a) nounwind {
-; CHECK-LABEL: test_call_defined:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    call defined_function
-; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; RV32I-LABEL: test_call_defined:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    call defined_function
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: test_call_defined:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    addi sp, sp, -16
+; RV32I-PIC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-PIC-NEXT:    call defined_function
+; RV32I-PIC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    addi sp, sp, 16
+; RV32I-PIC-NEXT:    ret
 ;
 ; RV64I-LABEL: test_call_defined:
 ; RV64I:       # %bb.0:
@@ -197,21 +263,45 @@ define i32 @test_call_defined(i32 %a) nounwind {
 ; RV64I-LARGE-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-LARGE-NEXT:    addi sp, sp, 16
 ; RV64I-LARGE-NEXT:    ret
+;
+; RV64I-LARGE-ZICFILP-LABEL: test_call_defined:
+; RV64I-LARGE-ZICFILP:       # %bb.0:
+; RV64I-LARGE-ZICFILP-NEXT:    lpad 0
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
+; RV64I-LARGE-ZICFILP-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LARGE-ZICFILP-NEXT:  .Lpcrel_hi2:
+; RV64I-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI3_0)
+; RV64I-LARGE-ZICFILP-NEXT:    ld t2, %pcrel_lo(.Lpcrel_hi2)(a1)
+; RV64I-LARGE-ZICFILP-NEXT:    jalr t2
+; RV64I-LARGE-ZICFILP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
+; RV64I-LARGE-ZICFILP-NEXT:    ret
   %1 = call i32 @defined_function(i32 %a)
   ret i32 %1
 }
 
 define i32 @test_call_indirect(ptr %a, i32 %b) nounwind {
-; CHECK-LABEL: test_call_indirect:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    mv a2, a0
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:    jalr a2
-; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; RV32I-LABEL: test_call_indirect:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    jalr a2
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: test_call_indirect:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    addi sp, sp, -16
+; RV32I-PIC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-PIC-NEXT:    mv a2, a0
+; RV32I-PIC-NEXT:    mv a0, a1
+; RV32I-PIC-NEXT:    jalr a2
+; RV32I-PIC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    addi sp, sp, 16
+; RV32I-PIC-NEXT:    ret
 ;
 ; RV64I-LABEL: test_call_indirect:
 ; RV64I:       # %bb.0:
@@ -256,6 +346,18 @@ define i32 @test_call_indirect(ptr %a, i32 %b) nounwind {
 ; RV64I-LARGE-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-LARGE-NEXT:    addi sp, sp, 16
 ; RV64I-LARGE-NEXT:    ret
+;
+; RV64I-LARGE-ZICFILP-LABEL: test_call_indirect:
+; RV64I-LARGE-ZICFILP:       # %bb.0:
+; RV64I-LARGE-ZICFILP-NEXT:    lpad 0
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
+; RV64I-LARGE-ZICFILP-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LARGE-ZICFILP-NEXT:    mv a2, a0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a0, a1
+; RV64I-LARGE-ZICFILP-NEXT:    jalr a2
+; RV64I-LARGE-ZICFILP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
+; RV64I-LARGE-ZICFILP-NEXT:    ret
   %1 = call i32 %a(i32 %b)
   ret i32 %1
 }
@@ -263,22 +365,39 @@ define i32 @test_call_indirect(ptr %a, i32 %b) nounwind {
 ; Make sure we don't use t0 as the source for jalr as that is a hint to pop the
 ; return address stack on some microarchitectures.
 define i32 @test_call_indirect_no_t0(ptr %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) nounwind {
-; CHECK-LABEL: test_call_indirect_no_t0:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    mv t1, a0
-; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:    mv a1, a2
-; CHECK-NEXT:    mv a2, a3
-; CHECK-NEXT:    mv a3, a4
-; CHECK-NEXT:    mv a4, a5
-; CHECK-NEXT:    mv a5, a6
-; CHECK-NEXT:    mv a6, a7
-; CHECK-NEXT:    jalr t1
-; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; RV32I-LABEL: test_call_indirect_no_t0:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv t1, a0
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, a2
+; RV32I-NEXT:    mv a2, a3
+; RV32I-NEXT:    mv a3, a4
+; RV32I-NEXT:    mv a4, a5
+; RV32I-NEXT:    mv a5, a6
+; RV32I-NEXT:    mv a6, a7
+; RV32I-NEXT:    jalr t1
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: test_call_indirect_no_t0:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    addi sp, sp, -16
+; RV32I-PIC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-PIC-NEXT:    mv t1, a0
+; RV32I-PIC-NEXT:    mv a0, a1
+; RV32I-PIC-NEXT:    mv a1, a2
+; RV32I-PIC-NEXT:    mv a2, a3
+; RV32I-PIC-NEXT:    mv a3, a4
+; RV32I-PIC-NEXT:    mv a4, a5
+; RV32I-PIC-NEXT:    mv a5, a6
+; RV32I-PIC-NEXT:    mv a6, a7
+; RV32I-PIC-NEXT:    jalr t1
+; RV32I-PIC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    addi sp, sp, 16
+; RV32I-PIC-NEXT:    ret
 ;
 ; RV64I-LABEL: test_call_indirect_no_t0:
 ; RV64I:       # %bb.0:
@@ -347,6 +466,24 @@ define i32 @test_call_indirect_no_t0(ptr %a, i32 %b, i32 %c, i32 %d, i32 %e, i32
 ; RV64I-LARGE-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
 ; RV64I-LARGE-NEXT:    addi sp, sp, 16
 ; RV64I-LARGE-NEXT:    ret
+;
+; RV64I-LARGE-ZICFILP-LABEL: test_call_indirect_no_t0:
+; RV64I-LARGE-ZICFILP:       # %bb.0:
+; RV64I-LARGE-ZICFILP-NEXT:    lpad 0
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
+; RV64I-LARGE-ZICFILP-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LARGE-ZICFILP-NEXT:    mv t1, a0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a0, a1
+; RV64I-LARGE-ZICFILP-NEXT:    mv a1, a2
+; RV64I-LARGE-ZICFILP-NEXT:    mv a2, a3
+; RV64I-LARGE-ZICFILP-NEXT:    mv a3, a4
+; RV64I-LARGE-ZICFILP-NEXT:    mv a4, a5
+; RV64I-LARGE-ZICFILP-NEXT:    mv a5, a6
+; RV64I-LARGE-ZICFILP-NEXT:    mv a6, a7
+; RV64I-LARGE-ZICFILP-NEXT:    jalr t1
+; RV64I-LARGE-ZICFILP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
+; RV64I-LARGE-ZICFILP-NEXT:    ret
   %1 = call i32 %a(i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
   ret i32 %1
 }
@@ -355,10 +492,15 @@ define i32 @test_call_indirect_no_t0(ptr %a, i32 %b, i32 %c, i32 %d, i32 %e, i32
 ; introduced when compiling with optimisation.
 
 define fastcc i32 @fastcc_function(i32 %a, i32 %b) nounwind {
-; CHECK-LABEL: fastcc_function:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    add a0, a0, a1
-; CHECK-NEXT:    ret
+; RV32I-LABEL: fastcc_function:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: fastcc_function:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    add a0, a0, a1
+; RV32I-PIC-NEXT:    ret
 ;
 ; RV64I-LABEL: fastcc_function:
 ; RV64I:       # %bb.0:
@@ -379,23 +521,42 @@ define fastcc i32 @fastcc_function(i32 %a, i32 %b) nounwind {
 ; RV64I-LARGE:       # %bb.0:
 ; RV64I-LARGE-NEXT:    addw a0, a0, a1
 ; RV64I-LARGE-NEXT:    ret
+;
+; RV64I-LARGE-ZICFILP-LABEL: fastcc_function:
+; RV64I-LARGE-ZICFILP:       # %bb.0:
+; RV64I-LARGE-ZICFILP-NEXT:    lpad 0
+; RV64I-LARGE-ZICFILP-NEXT:    addw a0, a0, a1
+; RV64I-LARGE-ZICFILP-NEXT:    ret
  %1 = add i32 %a, %b
  ret i32 %1
 }
 
 define i32 @test_call_fastcc(i32 %a, i32 %b) nounwind {
-; CHECK-LABEL: test_call_fastcc:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    call fastcc_function
-; CHECK-NEXT:    mv a0, s0
-; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; RV32I-LABEL: test_call_fastcc:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    call fastcc_function
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: test_call_fastcc:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    addi sp, sp, -16
+; RV32I-PIC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-PIC-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-PIC-NEXT:    mv s0, a0
+; RV32I-PIC-NEXT:    call fastcc_function
+; RV32I-PIC-NEXT:    mv a0, s0
+; RV32I-PIC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    addi sp, sp, 16
+; RV32I-PIC-NEXT:    ret
 ;
 ; RV64I-LABEL: test_call_fastcc:
 ; RV64I:       # %bb.0:
@@ -452,6 +613,24 @@ define i32 @test_call_fastcc(i32 %a, i32 %b) nounwind {
 ; RV64I-LARGE-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
 ; RV64I-LARGE-NEXT:    addi sp, sp, 16
 ; RV64I-LARGE-NEXT:    ret
+;
+; RV64I-LARGE-ZICFILP-LABEL: test_call_fastcc:
+; RV64I-LARGE-ZICFILP:       # %bb.0:
+; RV64I-LARGE-ZICFILP-NEXT:    lpad 0
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
+; RV64I-LARGE-ZICFILP-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
+; RV64I-LARGE-ZICFILP-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
+; RV64I-LARGE-ZICFILP-NEXT:    mv s0, a0
+; RV64I-LARGE-ZICFILP-NEXT:  .Lpcrel_hi3:
+; RV64I-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI7_0)
+; RV64I-LARGE-ZICFILP-NEXT:    ld t2, %pcrel_lo(.Lpcrel_hi3)(a0)
+; RV64I-LARGE-ZICFILP-NEXT:    mv a0, s0
+; RV64I-LARGE-ZICFILP-NEXT:    jalr t2
+; RV64I-LARGE-ZICFILP-NEXT:    mv a0, s0
+; RV64I-LARGE-ZICFILP-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
+; RV64I-LARGE-ZICFILP-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
+; RV64I-LARGE-ZICFILP-NEXT:    ret
   %1 = call fastcc i32 @fastcc_function(i32 %a, i32 %b)
   ret i32 %a
 }
@@ -459,27 +638,49 @@ define i32 @test_call_fastcc(i32 %a, i32 %b) nounwind {
 declare i32 @external_many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) nounwind
 
 define i32 @test_call_external_many_args(i32 %a) nounwind {
-; CHECK-LABEL: test_call_external_many_args:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    mv s0, a0
-; CHECK-NEXT:    sw a0, 4(sp)
-; CHECK-NEXT:    sw a0, 0(sp)
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    mv a2, a0
-; CHECK-NEXT:    mv a3, a0
-; CHECK-NEXT:    mv a4, a0
-; CHECK-NEXT:    mv a5, a0
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a0
-; CHECK-NEXT:    call external_many_args
-; CHECK-NEXT:    mv a0, s0
-; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; RV32I-LABEL: test_call_external_many_args:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    mv s0, a0
+; RV32I-NEXT:    sw a0, 4(sp)
+; RV32I-NEXT:    sw a0, 0(sp)
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    mv a4, a0
+; RV32I-NEXT:    mv a5, a0
+; RV32I-NEXT:    mv a6, a0
+; RV32I-NEXT:    mv a7, a0
+; RV32I-NEXT:    call external_many_args
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: test_call_external_many_args:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    addi sp, sp, -16
+; RV32I-PIC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-PIC-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-PIC-NEXT:    mv s0, a0
+; RV32I-PIC-NEXT:    sw a0, 4(sp)
+; RV32I-PIC-NEXT:    sw a0, 0(sp)
+; RV32I-PIC-NEXT:    mv a1, a0
+; RV32I-PIC-NEXT:    mv a2, a0
+; RV32I-PIC-NEXT:    mv a3, a0
+; RV32I-PIC-NEXT:    mv a4, a0
+; RV32I-PIC-NEXT:    mv a5, a0
+; RV32I-PIC-NEXT:    mv a6, a0
+; RV32I-PIC-NEXT:    mv a7, a0
+; RV32I-PIC-NEXT:    call external_many_args
+; RV32I-PIC-NEXT:    mv a0, s0
+; RV32I-PIC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    addi sp, sp, 16
+; RV32I-PIC-NEXT:    ret
 ;
 ; RV64I-LABEL: test_call_external_many_args:
 ; RV64I:       # %bb.0:
@@ -572,17 +773,50 @@ define i32 @test_call_external_many_args(i32 %a) nounwind {
 ; RV64I-LARGE-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; RV64I-LARGE-NEXT:    addi sp, sp, 32
 ; RV64I-LARGE-NEXT:    ret
+;
+; RV64I-LARGE-ZICFILP-LABEL: test_call_external_many_args:
+; RV64I-LARGE-ZICFILP:       # %bb.0:
+; RV64I-LARGE-ZICFILP-NEXT:    lpad 0
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, -32
+; RV64I-LARGE-ZICFILP-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-LARGE-ZICFILP-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-LARGE-ZICFILP-NEXT:    mv s0, a0
+; RV64I-LARGE-ZICFILP-NEXT:  .Lpcrel_hi4:
+; RV64I-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI8_0)
+; RV64I-LARGE-ZICFILP-NEXT:    ld t2, %pcrel_lo(.Lpcrel_hi4)(a0)
+; RV64I-LARGE-ZICFILP-NEXT:    sd s0, 8(sp)
+; RV64I-LARGE-ZICFILP-NEXT:    sd s0, 0(sp)
+; RV64I-LARGE-ZICFILP-NEXT:    mv a0, s0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a1, s0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a2, s0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a3, s0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a4, s0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a5, s0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a6, s0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a7, s0
+; RV64I-LARGE-ZICFILP-NEXT:    jalr t2
+; RV64I-LARGE-ZICFILP-NEXT:    mv a0, s0
+; RV64I-LARGE-ZICFILP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-LARGE-ZICFILP-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, 32
+; RV64I-LARGE-ZICFILP-NEXT:    ret
   %1 = call i32 @external_many_args(i32 %a, i32 %a, i32 %a, i32 %a, i32 %a,
                                     i32 %a, i32 %a, i32 %a, i32 %a, i32 %a)
   ret i32 %a
 }
 
 define i32 @defined_many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 %j) nounwind {
-; CHECK-LABEL: defined_many_args:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lw a0, 4(sp)
-; CHECK-NEXT:    addi a0, a0, 1
-; CHECK-NEXT:    ret
+; RV32I-LABEL: defined_many_args:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lw a0, 4(sp)
+; RV32I-NEXT:    addi a0, a0, 1
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: defined_many_args:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    lw a0, 4(sp)
+; RV32I-PIC-NEXT:    addi a0, a0, 1
+; RV32I-PIC-NEXT:    ret
 ;
 ; RV64I-LABEL: defined_many_args:
 ; RV64I:       # %bb.0:
@@ -607,28 +841,53 @@ define i32 @defined_many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 %
 ; RV64I-LARGE-NEXT:    lw a0, 8(sp)
 ; RV64I-LARGE-NEXT:    addiw a0, a0, 1
 ; RV64I-LARGE-NEXT:    ret
+;
+; RV64I-LARGE-ZICFILP-LABEL: defined_many_args:
+; RV64I-LARGE-ZICFILP:       # %bb.0:
+; RV64I-LARGE-ZICFILP-NEXT:    lpad 0
+; RV64I-LARGE-ZICFILP-NEXT:    lw a0, 8(sp)
+; RV64I-LARGE-ZICFILP-NEXT:    addiw a0, a0, 1
+; RV64I-LARGE-ZICFILP-NEXT:    ret
   %added = add i32 %j, 1
   ret i32 %added
 }
 
 define i32 @test_call_defined_many_args(i32 %a) nounwind {
-; CHECK-LABEL: test_call_defined_many_args:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    addi sp, sp, -16
-; CHECK-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
-; CHECK-NEXT:    sw a0, 4(sp)
-; CHECK-NEXT:    sw a0, 0(sp)
-; CHECK-NEXT:    mv a1, a0
-; CHECK-NEXT:    mv a2, a0
-; CHECK-NEXT:    mv a3, a0
-; CHECK-NEXT:    mv a4, a0
-; CHECK-NEXT:    mv a5, a0
-; CHECK-NEXT:    mv a6, a0
-; CHECK-NEXT:    mv a7, a0
-; CHECK-NEXT:    call defined_many_args
-; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
-; CHECK-NEXT:    addi sp, sp, 16
-; CHECK-NEXT:    ret
+; RV32I-LABEL: test_call_defined_many_args:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw a0, 4(sp)
+; RV32I-NEXT:    sw a0, 0(sp)
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    mv a4, a0
+; RV32I-NEXT:    mv a5, a0
+; RV32I-NEXT:    mv a6, a0
+; RV32I-NEXT:    mv a7, a0
+; RV32I-NEXT:    call defined_many_args
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32I-PIC-LABEL: test_call_defined_many_args:
+; RV32I-PIC:       # %bb.0:
+; RV32I-PIC-NEXT:    addi sp, sp, -16
+; RV32I-PIC-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-PIC-NEXT:    sw a0, 4(sp)
+; RV32I-PIC-NEXT:    sw a0, 0(sp)
+; RV32I-PIC-NEXT:    mv a1, a0
+; RV32I-PIC-NEXT:    mv a2, a0
+; RV32I-PIC-NEXT:    mv a3, a0
+; RV32I-PIC-NEXT:    mv a4, a0
+; RV32I-PIC-NEXT:    mv a5, a0
+; RV32I-PIC-NEXT:    mv a6, a0
+; RV32I-PIC-NEXT:    mv a7, a0
+; RV32I-PIC-NEXT:    call defined_many_args
+; RV32I-PIC-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-PIC-NEXT:    addi sp, sp, 16
+; RV32I-PIC-NEXT:    ret
 ;
 ; RV64I-LABEL: test_call_defined_many_args:
 ; RV64I:       # %bb.0:
@@ -704,10 +963,31 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind {
 ; RV64I-LARGE-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
 ; RV64I-LARGE-NEXT:    addi sp, sp, 32
 ; RV64I-LARGE-NEXT:    ret
+;
+; RV64I-LARGE-ZICFILP-LABEL: test_call_defined_many_args:
+; RV64I-LARGE-ZICFILP:       # %bb.0:
+; RV64I-LARGE-ZICFILP-NEXT:    lpad 0
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, -32
+; RV64I-LARGE-ZICFILP-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-LARGE-ZICFILP-NEXT:  .Lpcrel_hi5:
+; RV64I-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI10_0)
+; RV64I-LARGE-ZICFILP-NEXT:    ld t2, %pcrel_lo(.Lpcrel_hi5)(a1)
+; RV64I-LARGE-ZICFILP-NEXT:    sd a0, 8(sp)
+; RV64I-LARGE-ZICFILP-NEXT:    sd a0, 0(sp)
+; RV64I-LARGE-ZICFILP-NEXT:    mv a1, a0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a2, a0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a3, a0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a4, a0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a5, a0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a6, a0
+; RV64I-LARGE-ZICFILP-NEXT:    mv a7, a0
+; RV64I-LARGE-ZICFILP-NEXT:    jalr t2
+; RV64I-LARGE-ZICFILP-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-LARGE-ZICFILP-NEXT:    addi sp, sp, 32
+; RV64I-LARGE-ZICFILP-NEXT:    ret
   %1 = call i32 @defined_many_args(i32 %a, i32 %a, i32 %a, i32 %a, i32 %a,
                                    i32 %a, i32 %a, i32 %a, i32 %a, i32 %a)
   ret i32 %1
 }
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32I: {{.*}}
-; RV32I-PIC: {{.*}}
+; CHECK: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll
index d3e495bb723ad8..4dd6ed68ff9811 100644
--- a/llvm/test/CodeGen/RISCV/tail-calls.ll
+++ b/llvm/test/CodeGen/RISCV/tail-calls.ll
@@ -1,5 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple riscv32-unknown-linux-gnu -o - %s | FileCheck %s
+; RUN: llc -mtriple riscv32-unknown-linux-gnu -mattr=experimental-zicfilp \
+; RUN:   -code-model=large -o - %s \
+; RUN:   | FileCheck %s -check-prefix=CHECK-LARGE-ZICFILP
 ; RUN: llc -mtriple riscv32-unknown-elf       -o - %s | FileCheck %s
 
 ; Perform tail call optimization for global address.
@@ -8,6 +11,14 @@ define i32 @caller_tail(i32 %i) nounwind {
 ; CHECK-LABEL: caller_tail:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    tail callee_tail
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_tail:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi0:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI0_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi0)(a1)
+; CHECK-LARGE-ZICFILP-NEXT:    jr t2
 entry:
   %r = tail call i32 @callee_tail(i32 %i)
   ret i32 %r
@@ -26,6 +37,21 @@ define void @caller_extern(ptr %src) optsize {
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    mv a1, a3
 ; CHECK-NEXT:    tail memcpy
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_extern:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi1:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI1_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw a1, %pcrel_lo(.Lpcrel_hi1)(a1)
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi2:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a2, %pcrel_hi(.LCPI1_1)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi2)(a2)
+; CHECK-LARGE-ZICFILP-NEXT:    li a2, 7
+; CHECK-LARGE-ZICFILP-NEXT:    mv a3, a0
+; CHECK-LARGE-ZICFILP-NEXT:    mv a0, a1
+; CHECK-LARGE-ZICFILP-NEXT:    mv a1, a3
+; CHECK-LARGE-ZICFILP-NEXT:    jr t2
 entry:
   tail call void @llvm.memcpy.p0.p0.i32(ptr @dest, ptr %src, i32 7, i1 false)
   ret void
@@ -43,6 +69,21 @@ define void @caller_extern_pgso(ptr %src) !prof !14 {
 ; CHECK-NEXT:    mv a0, a1
 ; CHECK-NEXT:    mv a1, a3
 ; CHECK-NEXT:    tail memcpy
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_extern_pgso:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi3:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI2_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw a1, %pcrel_lo(.Lpcrel_hi3)(a1)
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi4:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a2, %pcrel_hi(.LCPI2_1)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi4)(a2)
+; CHECK-LARGE-ZICFILP-NEXT:    li a2, 7
+; CHECK-LARGE-ZICFILP-NEXT:    mv a3, a0
+; CHECK-LARGE-ZICFILP-NEXT:    mv a0, a1
+; CHECK-LARGE-ZICFILP-NEXT:    mv a1, a3
+; CHECK-LARGE-ZICFILP-NEXT:    jr t2
 entry:
   tail call void @llvm.memcpy.p0.p0.i32(ptr @dest_pgso, ptr %src, i32 7, i1 false)
   ret void
@@ -63,8 +104,21 @@ define void @caller_indirect_tail(i32 %a) nounwind {
 ; CHECK-NEXT:    lui t1, %hi(callee_indirect1)
 ; CHECK-NEXT:    addi t1, t1, %lo(callee_indirect1)
 ; CHECK-NEXT:    jr t1
-
-
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_tail:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:    beqz a0, .LBB3_2
+; CHECK-LARGE-ZICFILP-NEXT:  # %bb.1: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi6:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI3_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t1, %pcrel_lo(.Lpcrel_hi6)(a0)
+; CHECK-LARGE-ZICFILP-NEXT:    jr t1
+; CHECK-LARGE-ZICFILP-NEXT:  .LBB3_2:
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi5:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI3_1)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t1, %pcrel_lo(.Lpcrel_hi5)(a0)
+; CHECK-LARGE-ZICFILP-NEXT:    jr t1
 entry:
   %tobool = icmp eq i32 %a, 0
   %callee = select i1 %tobool, ptr @callee_indirect1, ptr @callee_indirect2
@@ -86,6 +140,19 @@ define i32 @caller_indirect_no_t0(ptr %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5
 ; CHECK-NEXT:    mv a5, a6
 ; CHECK-NEXT:    mv a6, a7
 ; CHECK-NEXT:    jr t1
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_no_t0:
+; CHECK-LARGE-ZICFILP:       # %bb.0:
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:    mv t1, a0
+; CHECK-LARGE-ZICFILP-NEXT:    mv a0, a1
+; CHECK-LARGE-ZICFILP-NEXT:    mv a1, a2
+; CHECK-LARGE-ZICFILP-NEXT:    mv a2, a3
+; CHECK-LARGE-ZICFILP-NEXT:    mv a3, a4
+; CHECK-LARGE-ZICFILP-NEXT:    mv a4, a5
+; CHECK-LARGE-ZICFILP-NEXT:    mv a5, a6
+; CHECK-LARGE-ZICFILP-NEXT:    mv a6, a7
+; CHECK-LARGE-ZICFILP-NEXT:    jr t1
   %9 = tail call i32 %0(i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7)
   ret i32 %9
 }
@@ -108,6 +175,26 @@ define void @caller_varargs(i32 %a, i32 %b) nounwind {
 ; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_varargs:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
+; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi7:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a2, %pcrel_hi(.LCPI5_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi7)(a2)
+; CHECK-LARGE-ZICFILP-NEXT:    sw a0, 0(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    mv a2, a1
+; CHECK-LARGE-ZICFILP-NEXT:    mv a3, a0
+; CHECK-LARGE-ZICFILP-NEXT:    mv a4, a0
+; CHECK-LARGE-ZICFILP-NEXT:    mv a5, a1
+; CHECK-LARGE-ZICFILP-NEXT:    mv a6, a1
+; CHECK-LARGE-ZICFILP-NEXT:    mv a7, a0
+; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
+; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
+; CHECK-LARGE-ZICFILP-NEXT:    ret
 entry:
   %call = tail call i32 (i32, ...) @callee_varargs(i32 %a, i32 %b, i32 %b, i32 %a, i32 %a, i32 %b, i32 %b, i32 %a, i32 %a)
   ret void
@@ -136,6 +223,31 @@ define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g
 ; CHECK-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_args:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -32
+; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    lw t0, 32(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t1, 36(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t3, 40(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t4, 44(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, 48(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t5, 52(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    sw t5, 20(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    sw t2, 16(sp)
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi8:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc t2, %pcrel_hi(.LCPI6_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi8)(t2)
+; CHECK-LARGE-ZICFILP-NEXT:    sw t4, 12(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    sw t3, 8(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    sw t1, 4(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    sw t0, 0(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
+; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 32
+; CHECK-LARGE-ZICFILP-NEXT:    ret
 entry:
   %r = tail call i32 @callee_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n)
   ret i32 %r
@@ -158,6 +270,25 @@ define void @caller_indirect_args() nounwind {
 ; CHECK-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 32
 ; CHECK-NEXT:    ret
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_indirect_args:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -32
+; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    lui a0, 262128
+; CHECK-LARGE-ZICFILP-NEXT:    sw a0, 12(sp)
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi9:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI7_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi9)(a0)
+; CHECK-LARGE-ZICFILP-NEXT:    sw zero, 8(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    sw zero, 4(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    mv a0, sp
+; CHECK-LARGE-ZICFILP-NEXT:    sw zero, 0(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
+; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 32
+; CHECK-LARGE-ZICFILP-NEXT:    ret
 entry:
   %call = tail call i32 @callee_indirect_args(fp128 0xL00000000000000003FFF000000000000)
   ret void
@@ -169,6 +300,14 @@ define void @caller_weak() nounwind {
 ; CHECK-LABEL: caller_weak:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    tail callee_weak
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_weak:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi10:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI8_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi10)(a0)
+; CHECK-LARGE-ZICFILP-NEXT:    jr t2
 entry:
   tail call void @callee_weak()
   ret void
@@ -217,6 +356,48 @@ define void @caller_irq() nounwind "interrupt"="machine" {
 ; CHECK-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 64
 ; CHECK-NEXT:    mret
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_irq:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -64
+; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 60(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw t0, 56(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw t1, 52(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw t2, 48(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw a0, 44(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw a1, 40(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw a2, 36(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw a3, 32(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw a4, 28(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw a5, 24(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw a6, 20(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw a7, 16(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw t3, 12(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw t4, 8(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw t5, 4(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    sw t6, 0(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi11:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI9_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi11)(a0)
+; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
+; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 60(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw t0, 56(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw t1, 52(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, 48(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw a0, 44(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw a1, 40(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw a2, 36(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw a3, 32(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw a4, 28(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw a5, 24(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw a6, 20(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw a7, 16(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw t3, 12(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw t4, 8(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw t5, 4(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    lw t6, 0(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 64
+; CHECK-LARGE-ZICFILP-NEXT:    mret
 entry:
   tail call void @callee_irq()
   ret void
@@ -238,6 +419,22 @@ define i32 @caller_byval() nounwind {
 ; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_byval:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
+; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:    lw a0, 8(sp)
+; CHECK-LARGE-ZICFILP-NEXT:    sw a0, 4(sp)
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi12:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI10_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi12)(a0)
+; CHECK-LARGE-ZICFILP-NEXT:    addi a0, sp, 4
+; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
+; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
+; CHECK-LARGE-ZICFILP-NEXT:    ret
 entry:
   %a = alloca ptr
   %r = tail call i32 @callee_byval(ptr byval(ptr) %a)
@@ -260,6 +457,22 @@ define void @caller_nostruct() nounwind {
 ; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_nostruct:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
+; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi13:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI11_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw a0, %pcrel_lo(.Lpcrel_hi13)(a0)
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi14:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI11_1)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi14)(a1)
+; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
+; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
+; CHECK-LARGE-ZICFILP-NEXT:    ret
 entry:
   tail call void @callee_struct(ptr sret(%struct.A) @a)
   ret void
@@ -276,6 +489,19 @@ define void @caller_struct(ptr sret(%struct.A) %a) nounwind {
 ; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
+;
+; CHECK-LARGE-ZICFILP-LABEL: caller_struct:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
+; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi15:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI12_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi15)(a0)
+; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
+; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
+; CHECK-LARGE-ZICFILP-NEXT:    ret
 entry:
   tail call void @callee_nostruct()
   ret void
@@ -291,6 +517,19 @@ define i32 @disable_tail_calls(i32 %i) nounwind "disable-tail-calls"="true" {
 ; CHECK-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
 ; CHECK-NEXT:    addi sp, sp, 16
 ; CHECK-NEXT:    ret
+;
+; CHECK-LARGE-ZICFILP-LABEL: disable_tail_calls:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, -16
+; CHECK-LARGE-ZICFILP-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi16:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a1, %pcrel_hi(.LCPI13_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi16)(a1)
+; CHECK-LARGE-ZICFILP-NEXT:    jalr t2
+; CHECK-LARGE-ZICFILP-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; CHECK-LARGE-ZICFILP-NEXT:    addi sp, sp, 16
+; CHECK-LARGE-ZICFILP-NEXT:    ret
 entry:
   %rv = tail call i32 @callee_tail(i32 %i)
   ret i32 %rv
@@ -317,6 +556,35 @@ define i32 @duplicate_returns(i32 %a, i32 %b) nounwind {
 ; CHECK-NEXT:    tail test1
 ; CHECK-NEXT:  .LBB14_6: # %if.else8
 ; CHECK-NEXT:    tail test3
+;
+; CHECK-LARGE-ZICFILP-LABEL: duplicate_returns:
+; CHECK-LARGE-ZICFILP:       # %bb.0: # %entry
+; CHECK-LARGE-ZICFILP-NEXT:    lpad 0
+; CHECK-LARGE-ZICFILP-NEXT:    beqz a0, .LBB14_4
+; CHECK-LARGE-ZICFILP-NEXT:  # %bb.1: # %if.else
+; CHECK-LARGE-ZICFILP-NEXT:    beqz a1, .LBB14_5
+; CHECK-LARGE-ZICFILP-NEXT:  # %bb.2: # %if.else4
+; CHECK-LARGE-ZICFILP-NEXT:    bge a1, a0, .LBB14_6
+; CHECK-LARGE-ZICFILP-NEXT:  # %bb.3: # %if.then6
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi19:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI14_1)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi19)(a0)
+; CHECK-LARGE-ZICFILP-NEXT:    jr t2
+; CHECK-LARGE-ZICFILP-NEXT:  .LBB14_4: # %if.then
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi17:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI14_3)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi17)(a0)
+; CHECK-LARGE-ZICFILP-NEXT:    jr t2
+; CHECK-LARGE-ZICFILP-NEXT:  .LBB14_5: # %if.then2
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi18:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI14_2)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi18)(a0)
+; CHECK-LARGE-ZICFILP-NEXT:    jr t2
+; CHECK-LARGE-ZICFILP-NEXT:  .LBB14_6: # %if.else8
+; CHECK-LARGE-ZICFILP-NEXT:  .Lpcrel_hi20:
+; CHECK-LARGE-ZICFILP-NEXT:    auipc a0, %pcrel_hi(.LCPI14_0)
+; CHECK-LARGE-ZICFILP-NEXT:    lw t2, %pcrel_lo(.Lpcrel_hi20)(a0)
+; CHECK-LARGE-ZICFILP-NEXT:    jr t2
 entry:
   %cmp = icmp eq i32 %a, 0
   br i1 %cmp, label %if.then, label %if.else



More information about the llvm-commits mailing list