[llvm] [RISCV] Don't use x7 as input argument for fastcc when Zicfilp enabled. (PR #93321)

Yeting Kuo via llvm-commits llvm-commits at lists.llvm.org
Sun May 26 23:22:48 PDT 2024


https://github.com/yetingk updated https://github.com/llvm/llvm-project/pull/93321

>From 2444deaadfab98390ffb812618be25b1171b4156 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Fri, 24 May 2024 09:39:04 -0700
Subject: [PATCH 1/2] [RISCV] Don't use t2(x7) as input argument for fastcc
 when Zicfilp enabled.

Zicfilp needs x7 as the landing pad label register.
https://github.com/riscv/riscv-cfi/blob/main/src/cfi_forward.adoc
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 27 +++++++++++++++------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f0e5a7d393b6c..382bc3ce3cdf6 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -18402,7 +18402,8 @@ ArrayRef<MCPhysReg> RISCV::getArgGPRs(const RISCVABI::ABI ABI) {
   return ArrayRef(ArgIGPRs);
 }
 
-static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
+static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI,
+                                            bool HasZicfilp) {
   // The GPRs used for passing arguments in the FastCC, X5 and X6 might be used
   // for save-restore libcall, so we don't use them.
   static const MCPhysReg FastCCIGPRs[] = {
@@ -18415,10 +18416,18 @@ static ArrayRef<MCPhysReg> getFastCCArgGPRs(const RISCVABI::ABI ABI) {
                                           RISCV::X13, RISCV::X14, RISCV::X15,
                                           RISCV::X7};
 
+  // Zicfilp needs needs x7(t2) as the landing pad label register.
+  static const MCPhysReg FastCCIGPRsNonX7[] = {
+      RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15,
+      RISCV::X16, RISCV::X17, RISCV::X28, RISCV::X29, RISCV::X30, RISCV::X31};
+
+  static const MCPhysReg FastCCEGPRsNonX7[] = {
+      RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14, RISCV::X15};
+
   if (ABI == RISCVABI::ABI_ILP32E || ABI == RISCVABI::ABI_LP64E)
-    return ArrayRef(FastCCEGPRs);
+    return HasZicfilp ? ArrayRef(FastCCEGPRsNonX7) : ArrayRef(FastCCEGPRs);
 
-  return ArrayRef(FastCCIGPRs);
+  return HasZicfilp ? ArrayRef(FastCCIGPRsNonX7) : ArrayRef(FastCCIGPRs);
 }
 
 // Pass a 2*XLEN argument that has been split into two XLEN values through
@@ -18962,15 +18971,16 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
                             bool IsFixed, bool IsRet, Type *OrigTy,
                             const RISCVTargetLowering &TLI,
                             RVVArgDispatcher &RVVDispatcher) {
+  const RISCVSubtarget &Subtarget = TLI.getSubtarget();
+  bool HasZicfilp = Subtarget.hasStdExtZicfilp();
+
   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
-    if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
+    if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI, HasZicfilp))) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
   }
 
-  const RISCVSubtarget &Subtarget = TLI.getSubtarget();
-
   if (LocVT == MVT::f16 &&
       (Subtarget.hasStdExtZfh() || Subtarget.hasStdExtZfhmin())) {
     static const MCPhysReg FPR16List[] = {
@@ -19014,7 +19024,7 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
       (LocVT == MVT::f32 && Subtarget.hasStdExtZfinx()) ||
       (LocVT == MVT::f64 && Subtarget.is64Bit() &&
        Subtarget.hasStdExtZdinx())) {
-    if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
+    if (unsigned Reg = State.AllocateReg(getFastCCArgGPRs(ABI, HasZicfilp))) {
       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
       return false;
     }
@@ -19049,7 +19059,8 @@ bool RISCV::CC_RISCV_FastCC(const DataLayout &DL, RISCVABI::ABI ABI,
           CCValAssign::getReg(ValNo, ValVT, AllocatedVReg, LocVT, LocInfo));
     } else {
       // Try and pass the address via a "fast" GPR.
-      if (unsigned GPRReg = State.AllocateReg(getFastCCArgGPRs(ABI))) {
+      if (unsigned GPRReg =
+              State.AllocateReg(getFastCCArgGPRs(ABI, HasZicfilp))) {
         LocInfo = CCValAssign::Indirect;
         LocVT = TLI.getSubtarget().getXLenVT();
         State.addLoc(CCValAssign::getReg(ValNo, ValVT, GPRReg, LocVT, LocInfo));

>From 6be881a6abdd9d7b3e3e4c3c8b0ad004becaa7ef Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Sun, 26 May 2024 23:22:13 -0700
Subject: [PATCH 2/2] Add testcase.

---
 llvm/test/CodeGen/RISCV/fastcc-int.ll | 192 +++++++++++++++++---------
 1 file changed, 130 insertions(+), 62 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/fastcc-int.ll b/llvm/test/CodeGen/RISCV/fastcc-int.ll
index e4c41a1aa890f..c3e0c9222a517 100644
--- a/llvm/test/CodeGen/RISCV/fastcc-int.ll
+++ b/llvm/test/CodeGen/RISCV/fastcc-int.ll
@@ -1,8 +1,12 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV32 %s
+; RUN:   | FileCheck -check-prefixes=RV32,RV32-NOCFILP %s
 ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
-; RUN:   | FileCheck -check-prefix=RV64 %s
+; RUN:   | FileCheck -check-prefixes=RV64,RV64-NOCFILP %s
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zicfilp -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV32,RV32-CFILP %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zicfilp -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefixes=RV64,RV64-CFILP %s
 
 define fastcc i32 @callee(<16 x i32> %A) nounwind {
 ; RV32-LABEL: callee:
@@ -19,67 +23,131 @@ define fastcc i32 @callee(<16 x i32> %A) nounwind {
 ; With the fastcc, arguments will be passed by a0-a7 and t2-t6.
 ; The rest will be pushed on the stack.
 define i32 @caller(<16 x i32> %A) nounwind {
-; RV32-LABEL: caller:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
-; RV32-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
-; RV32-NEXT:    lw t0, 0(a0)
-; RV32-NEXT:    lw a1, 4(a0)
-; RV32-NEXT:    lw a2, 8(a0)
-; RV32-NEXT:    lw a3, 12(a0)
-; RV32-NEXT:    lw a4, 16(a0)
-; RV32-NEXT:    lw a5, 20(a0)
-; RV32-NEXT:    lw a6, 24(a0)
-; RV32-NEXT:    lw a7, 28(a0)
-; RV32-NEXT:    lw t2, 32(a0)
-; RV32-NEXT:    lw t3, 36(a0)
-; RV32-NEXT:    lw t4, 40(a0)
-; RV32-NEXT:    lw t5, 44(a0)
-; RV32-NEXT:    lw t6, 48(a0)
-; RV32-NEXT:    lw t1, 52(a0)
-; RV32-NEXT:    lw s0, 56(a0)
-; RV32-NEXT:    lw a0, 60(a0)
-; RV32-NEXT:    sw a0, 8(sp)
-; RV32-NEXT:    sw s0, 4(sp)
-; RV32-NEXT:    sw t1, 0(sp)
-; RV32-NEXT:    mv a0, t0
-; RV32-NEXT:    call callee
-; RV32-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
-; RV32-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
-; RV32-NEXT:    addi sp, sp, 32
-; RV32-NEXT:    ret
+; RV32-NOCFILP-LABEL: caller:
+; RV32-NOCFILP:       # %bb.0:
+; RV32-NOCFILP-NEXT:    addi sp, sp, -32
+; RV32-NOCFILP-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-NOCFILP-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-NOCFILP-NEXT:    lw t0, 0(a0)
+; RV32-NOCFILP-NEXT:    lw a1, 4(a0)
+; RV32-NOCFILP-NEXT:    lw a2, 8(a0)
+; RV32-NOCFILP-NEXT:    lw a3, 12(a0)
+; RV32-NOCFILP-NEXT:    lw a4, 16(a0)
+; RV32-NOCFILP-NEXT:    lw a5, 20(a0)
+; RV32-NOCFILP-NEXT:    lw a6, 24(a0)
+; RV32-NOCFILP-NEXT:    lw a7, 28(a0)
+; RV32-NOCFILP-NEXT:    lw t2, 32(a0)
+; RV32-NOCFILP-NEXT:    lw t3, 36(a0)
+; RV32-NOCFILP-NEXT:    lw t4, 40(a0)
+; RV32-NOCFILP-NEXT:    lw t5, 44(a0)
+; RV32-NOCFILP-NEXT:    lw t6, 48(a0)
+; RV32-NOCFILP-NEXT:    lw t1, 52(a0)
+; RV32-NOCFILP-NEXT:    lw s0, 56(a0)
+; RV32-NOCFILP-NEXT:    lw a0, 60(a0)
+; RV32-NOCFILP-NEXT:    sw a0, 8(sp)
+; RV32-NOCFILP-NEXT:    sw s0, 4(sp)
+; RV32-NOCFILP-NEXT:    sw t1, 0(sp)
+; RV32-NOCFILP-NEXT:    mv a0, t0
+; RV32-NOCFILP-NEXT:    call callee
+; RV32-NOCFILP-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-NOCFILP-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-NOCFILP-NEXT:    addi sp, sp, 32
+; RV32-NOCFILP-NEXT:    ret
 ;
-; RV64-LABEL: caller:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addi sp, sp, -48
-; RV64-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
-; RV64-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
-; RV64-NEXT:    ld t0, 0(a0)
-; RV64-NEXT:    ld a1, 8(a0)
-; RV64-NEXT:    ld a2, 16(a0)
-; RV64-NEXT:    ld a3, 24(a0)
-; RV64-NEXT:    ld a4, 32(a0)
-; RV64-NEXT:    ld a5, 40(a0)
-; RV64-NEXT:    ld a6, 48(a0)
-; RV64-NEXT:    ld a7, 56(a0)
-; RV64-NEXT:    ld t2, 64(a0)
-; RV64-NEXT:    ld t3, 72(a0)
-; RV64-NEXT:    ld t4, 80(a0)
-; RV64-NEXT:    ld t5, 88(a0)
-; RV64-NEXT:    ld t6, 96(a0)
-; RV64-NEXT:    ld t1, 104(a0)
-; RV64-NEXT:    ld s0, 112(a0)
-; RV64-NEXT:    ld a0, 120(a0)
-; RV64-NEXT:    sd a0, 16(sp)
-; RV64-NEXT:    sd s0, 8(sp)
-; RV64-NEXT:    sd t1, 0(sp)
-; RV64-NEXT:    mv a0, t0
-; RV64-NEXT:    call callee
-; RV64-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
-; RV64-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
-; RV64-NEXT:    addi sp, sp, 48
-; RV64-NEXT:    ret
+; RV64-NOCFILP-LABEL: caller:
+; RV64-NOCFILP:       # %bb.0:
+; RV64-NOCFILP-NEXT:    addi sp, sp, -48
+; RV64-NOCFILP-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-NOCFILP-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-NOCFILP-NEXT:    ld t0, 0(a0)
+; RV64-NOCFILP-NEXT:    ld a1, 8(a0)
+; RV64-NOCFILP-NEXT:    ld a2, 16(a0)
+; RV64-NOCFILP-NEXT:    ld a3, 24(a0)
+; RV64-NOCFILP-NEXT:    ld a4, 32(a0)
+; RV64-NOCFILP-NEXT:    ld a5, 40(a0)
+; RV64-NOCFILP-NEXT:    ld a6, 48(a0)
+; RV64-NOCFILP-NEXT:    ld a7, 56(a0)
+; RV64-NOCFILP-NEXT:    ld t2, 64(a0)
+; RV64-NOCFILP-NEXT:    ld t3, 72(a0)
+; RV64-NOCFILP-NEXT:    ld t4, 80(a0)
+; RV64-NOCFILP-NEXT:    ld t5, 88(a0)
+; RV64-NOCFILP-NEXT:    ld t6, 96(a0)
+; RV64-NOCFILP-NEXT:    ld t1, 104(a0)
+; RV64-NOCFILP-NEXT:    ld s0, 112(a0)
+; RV64-NOCFILP-NEXT:    ld a0, 120(a0)
+; RV64-NOCFILP-NEXT:    sd a0, 16(sp)
+; RV64-NOCFILP-NEXT:    sd s0, 8(sp)
+; RV64-NOCFILP-NEXT:    sd t1, 0(sp)
+; RV64-NOCFILP-NEXT:    mv a0, t0
+; RV64-NOCFILP-NEXT:    call callee
+; RV64-NOCFILP-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-NOCFILP-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-NOCFILP-NEXT:    addi sp, sp, 48
+; RV64-NOCFILP-NEXT:    ret
+;
+; RV32-CFILP-LABEL: caller:
+; RV32-CFILP:       # %bb.0:
+; RV32-CFILP-NEXT:    addi sp, sp, -32
+; RV32-CFILP-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
+; RV32-CFILP-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
+; RV32-CFILP-NEXT:    lw t0, 0(a0)
+; RV32-CFILP-NEXT:    lw a1, 4(a0)
+; RV32-CFILP-NEXT:    lw a2, 8(a0)
+; RV32-CFILP-NEXT:    lw a3, 12(a0)
+; RV32-CFILP-NEXT:    lw a4, 16(a0)
+; RV32-CFILP-NEXT:    lw a5, 20(a0)
+; RV32-CFILP-NEXT:    lw a6, 24(a0)
+; RV32-CFILP-NEXT:    lw a7, 28(a0)
+; RV32-CFILP-NEXT:    lw t3, 32(a0)
+; RV32-CFILP-NEXT:    lw t4, 36(a0)
+; RV32-CFILP-NEXT:    lw t5, 40(a0)
+; RV32-CFILP-NEXT:    lw t6, 44(a0)
+; RV32-CFILP-NEXT:    lw t1, 48(a0)
+; RV32-CFILP-NEXT:    lw t2, 52(a0)
+; RV32-CFILP-NEXT:    lw s0, 56(a0)
+; RV32-CFILP-NEXT:    lw a0, 60(a0)
+; RV32-CFILP-NEXT:    sw a0, 12(sp)
+; RV32-CFILP-NEXT:    sw s0, 8(sp)
+; RV32-CFILP-NEXT:    sw t2, 4(sp)
+; RV32-CFILP-NEXT:    sw t1, 0(sp)
+; RV32-CFILP-NEXT:    mv a0, t0
+; RV32-CFILP-NEXT:    call callee
+; RV32-CFILP-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
+; RV32-CFILP-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
+; RV32-CFILP-NEXT:    addi sp, sp, 32
+; RV32-CFILP-NEXT:    ret
+;
+; RV64-CFILP-LABEL: caller:
+; RV64-CFILP:       # %bb.0:
+; RV64-CFILP-NEXT:    addi sp, sp, -48
+; RV64-CFILP-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
+; RV64-CFILP-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
+; RV64-CFILP-NEXT:    ld t0, 0(a0)
+; RV64-CFILP-NEXT:    ld a1, 8(a0)
+; RV64-CFILP-NEXT:    ld a2, 16(a0)
+; RV64-CFILP-NEXT:    ld a3, 24(a0)
+; RV64-CFILP-NEXT:    ld a4, 32(a0)
+; RV64-CFILP-NEXT:    ld a5, 40(a0)
+; RV64-CFILP-NEXT:    ld a6, 48(a0)
+; RV64-CFILP-NEXT:    ld a7, 56(a0)
+; RV64-CFILP-NEXT:    ld t3, 64(a0)
+; RV64-CFILP-NEXT:    ld t4, 72(a0)
+; RV64-CFILP-NEXT:    ld t5, 80(a0)
+; RV64-CFILP-NEXT:    ld t6, 88(a0)
+; RV64-CFILP-NEXT:    ld t1, 96(a0)
+; RV64-CFILP-NEXT:    ld t2, 104(a0)
+; RV64-CFILP-NEXT:    ld s0, 112(a0)
+; RV64-CFILP-NEXT:    ld a0, 120(a0)
+; RV64-CFILP-NEXT:    sd a0, 24(sp)
+; RV64-CFILP-NEXT:    sd s0, 16(sp)
+; RV64-CFILP-NEXT:    sd t2, 8(sp)
+; RV64-CFILP-NEXT:    sd t1, 0(sp)
+; RV64-CFILP-NEXT:    mv a0, t0
+; RV64-CFILP-NEXT:    call callee
+; RV64-CFILP-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
+; RV64-CFILP-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
+; RV64-CFILP-NEXT:    addi sp, sp, 48
+; RV64-CFILP-NEXT:    ret
 	%C = call fastcc i32 @callee(<16 x i32> %A)
 	ret i32 %C
 }



More information about the llvm-commits mailing list