[llvm] [RISCV][CFI] Emit cfi_offset for every callee-saved vector registers (PR #100455)

Brandon Wu via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 24 21:53:59 PDT 2024


https://github.com/4vtomat updated https://github.com/llvm/llvm-project/pull/100455

>From 956ec64e15dcac29c1e0a080393108e449723318 Mon Sep 17 00:00:00 2001
From: Brandon Wu <brandon.wu at sifive.com>
Date: Wed, 24 Jul 2024 12:34:13 -0700
Subject: [PATCH 1/2] [RISCV][CFI] Emit cfi_offset for every callee-saved
 vector registers

The grouped vector register is modeled as a single register, e.g. V2M2,
which is actually V2 and V3. We need to decompose the grouped vector
register(if any) to individual vector register when emitting CFIs in
prologue.

Fixed https://github.com/llvm/llvm-project/issues/94500
---
 llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 44 +++++++++++++++++---
 llvm/test/CodeGen/RISCV/rvv-cfi-info.ll      | 16 +++++--
 2 files changed, 50 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index e676c2f94583d..23acea987d2c7 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -53,6 +53,35 @@ static const std::pair<MCPhysReg, int8_t> FixedCSRFIMap[] = {
     {/*s9*/ RISCV::X25, -11}, {/*s10*/ RISCV::X26, -12},
     {/*s11*/ RISCV::X27, -13}};
 
+// This function returns {Base VReg, corresponding LMUL} of callee-saved VReg.
+// For example:
+// V2M2 -> {RISCV::V2, 2}
+// V8   -> {RISCV::V8, 1}
+static std::pair<MCPhysReg, int8_t> getCSBaseVRegLMULPair(MCPhysReg VR) {
+  assert(((VR >= RISCV::V1 && VR <= RISCV::V7) ||
+          (VR >= RISCV::V24 && VR <= RISCV::V31) ||
+          (VR >= RISCV::V2M2 && VR <= RISCV::V6M2) ||
+          (VR >= RISCV::V24M2 && VR <= RISCV::V30M2)) &&
+         "Invalid VR");
+
+  static constexpr std::pair<MCPhysReg, int8_t> VRegLMULLUT[] = {
+      {/*V2M2*/ RISCV::V2, 2},   {/*V4M2*/ RISCV::V4, 2},
+      {/*V4M4*/ RISCV::V4, 4},   {/*V6M2*/ RISCV::V6, 2},
+      {/*V24M2*/ RISCV::V24, 2}, {/*V24M4*/ RISCV::V24, 4},
+      {/*V24M8*/ RISCV::V24, 8}, {/*V26M2*/ RISCV::V26, 2},
+      {/*V28M2*/ RISCV::V28, 2}, {/*V28M4*/ RISCV::V28, 4},
+      {/*V30M2*/ RISCV::V30, 2}};
+
+  if ((VR >= RISCV::V1 && VR <= RISCV::V7) ||
+      (VR >= RISCV::V24 && VR <= RISCV::V31))
+    return std::make_pair(VR, 1);
+
+  if (VR >= RISCV::V2M2 && VR <= RISCV::V6M2)
+    return VRegLMULLUT[VR - RISCV::V2M2];
+
+  return VRegLMULLUT[VR - RISCV::V24M2 + 4];
+}
+
 // For now we use x3, a.k.a gp, as pointer to shadow call stack.
 // User should not use x3 in their asm.
 static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -1554,12 +1583,15 @@ void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
     // Insert the spill to the stack frame.
     int FI = CS.getFrameIdx();
     if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector) {
-      unsigned CFIIndex = MF->addFrameInst(
-          createDefCFAOffset(*STI.getRegisterInfo(), CS.getReg(), -FixedSize,
-                             MFI.getObjectOffset(FI) / 8));
-      BuildMI(MBB, MI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
-          .addCFIIndex(CFIIndex)
-          .setMIFlag(MachineInstr::FrameSetup);
+      auto VRegLMULPair = getCSBaseVRegLMULPair(CS.getReg());
+      for (int i = 0; i < VRegLMULPair.second; ++i) {
+        unsigned CFIIndex = MF->addFrameInst(
+            createDefCFAOffset(*STI.getRegisterInfo(), VRegLMULPair.first + i,
+                               -FixedSize, MFI.getObjectOffset(FI) / 8 + i));
+        BuildMI(MBB, MI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+            .addCFIIndex(CFIIndex)
+            .setMIFlag(MachineInstr::FrameSetup);
+      }
     }
   }
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll b/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll
index c99388cbdaf44..93fe66695b70e 100644
--- a/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll
+++ b/llvm/test/CodeGen/RISCV/rvv-cfi-info.ll
@@ -27,8 +27,12 @@ define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee_cfi(<vscale x 1 x
 ; OMIT-FP-NEXT:    addi a0, sp, 16
 ; OMIT-FP-NEXT:    vs4r.v v4, (a0) # Unknown-size Folded Spill
 ; OMIT-FP-NEXT:    .cfi_escape 0x10, 0x61, 0x08, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 2 * vlenb
-; OMIT-FP-NEXT:    .cfi_escape 0x10, 0x62, 0x08, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2m2 @ cfa - 4 * vlenb
-; OMIT-FP-NEXT:    .cfi_escape 0x10, 0x64, 0x08, 0x11, 0x78, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4m4 @ cfa - 8 * vlenb
+; OMIT-FP-NEXT:    .cfi_escape 0x10, 0x62, 0x08, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2 @ cfa - 4 * vlenb
+; OMIT-FP-NEXT:    .cfi_escape 0x10, 0x63, 0x08, 0x11, 0x7d, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v3 @ cfa - 3 * vlenb
+; OMIT-FP-NEXT:    .cfi_escape 0x10, 0x64, 0x08, 0x11, 0x78, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4 @ cfa - 8 * vlenb
+; OMIT-FP-NEXT:    .cfi_escape 0x10, 0x65, 0x08, 0x11, 0x79, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v5 @ cfa - 7 * vlenb
+; OMIT-FP-NEXT:    .cfi_escape 0x10, 0x66, 0x08, 0x11, 0x7a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v6 @ cfa - 6 * vlenb
+; OMIT-FP-NEXT:    .cfi_escape 0x10, 0x67, 0x08, 0x11, 0x7b, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v7 @ cfa - 5 * vlenb
 ; OMIT-FP-NEXT:    #APP
 ; OMIT-FP-NEXT:    #NO_APP
 ; OMIT-FP-NEXT:    csrr a0, vlenb
@@ -79,8 +83,12 @@ define riscv_vector_cc <vscale x 1 x i32> @test_vector_callee_cfi(<vscale x 1 x
 ; NO-OMIT-FP-NEXT:    addi a0, a0, -32
 ; NO-OMIT-FP-NEXT:    vs4r.v v4, (a0) # Unknown-size Folded Spill
 ; NO-OMIT-FP-NEXT:    .cfi_escape 0x10, 0x61, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v1 @ cfa - 32 - 2 * vlenb
-; NO-OMIT-FP-NEXT:    .cfi_escape 0x10, 0x62, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2m2 @ cfa - 32 - 4 * vlenb
-; NO-OMIT-FP-NEXT:    .cfi_escape 0x10, 0x64, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4m4 @ cfa - 32 - 8 * vlenb
+; NO-OMIT-FP-NEXT:    .cfi_escape 0x10, 0x62, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7c, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v2 @ cfa - 32 - 4 * vlenb
+; NO-OMIT-FP-NEXT:    .cfi_escape 0x10, 0x63, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7d, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v3 @ cfa - 32 - 3 * vlenb
+; NO-OMIT-FP-NEXT:    .cfi_escape 0x10, 0x64, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x78, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v4 @ cfa - 32 - 8 * vlenb
+; NO-OMIT-FP-NEXT:    .cfi_escape 0x10, 0x65, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x79, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v5 @ cfa - 32 - 7 * vlenb
+; NO-OMIT-FP-NEXT:    .cfi_escape 0x10, 0x66, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7a, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v6 @ cfa - 32 - 6 * vlenb
+; NO-OMIT-FP-NEXT:    .cfi_escape 0x10, 0x67, 0x0b, 0x11, 0x60, 0x22, 0x11, 0x7b, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # $v7 @ cfa - 32 - 5 * vlenb
 ; NO-OMIT-FP-NEXT:    #APP
 ; NO-OMIT-FP-NEXT:    #NO_APP
 ; NO-OMIT-FP-NEXT:    csrr a0, vlenb

>From 3f3a27ae803ff6799396a4671d02348786a0ae19 Mon Sep 17 00:00:00 2001
From: Brandon Wu <brandon.wu at sifive.com>
Date: Wed, 24 Jul 2024 21:53:35 -0700
Subject: [PATCH 2/2] fixup! [RISCV][CFI] Emit cfi_offset for every
 callee-saved vector registers

---
 llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 47 ++++++--------------
 1 file changed, 13 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 23acea987d2c7..7abd5a49a1b5f 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -53,35 +53,6 @@ static const std::pair<MCPhysReg, int8_t> FixedCSRFIMap[] = {
     {/*s9*/ RISCV::X25, -11}, {/*s10*/ RISCV::X26, -12},
     {/*s11*/ RISCV::X27, -13}};
 
-// This function returns {Base VReg, corresponding LMUL} of callee-saved VReg.
-// For example:
-// V2M2 -> {RISCV::V2, 2}
-// V8   -> {RISCV::V8, 1}
-static std::pair<MCPhysReg, int8_t> getCSBaseVRegLMULPair(MCPhysReg VR) {
-  assert(((VR >= RISCV::V1 && VR <= RISCV::V7) ||
-          (VR >= RISCV::V24 && VR <= RISCV::V31) ||
-          (VR >= RISCV::V2M2 && VR <= RISCV::V6M2) ||
-          (VR >= RISCV::V24M2 && VR <= RISCV::V30M2)) &&
-         "Invalid VR");
-
-  static constexpr std::pair<MCPhysReg, int8_t> VRegLMULLUT[] = {
-      {/*V2M2*/ RISCV::V2, 2},   {/*V4M2*/ RISCV::V4, 2},
-      {/*V4M4*/ RISCV::V4, 4},   {/*V6M2*/ RISCV::V6, 2},
-      {/*V24M2*/ RISCV::V24, 2}, {/*V24M4*/ RISCV::V24, 4},
-      {/*V24M8*/ RISCV::V24, 8}, {/*V26M2*/ RISCV::V26, 2},
-      {/*V28M2*/ RISCV::V28, 2}, {/*V28M4*/ RISCV::V28, 4},
-      {/*V30M2*/ RISCV::V30, 2}};
-
-  if ((VR >= RISCV::V1 && VR <= RISCV::V7) ||
-      (VR >= RISCV::V24 && VR <= RISCV::V31))
-    return std::make_pair(VR, 1);
-
-  if (VR >= RISCV::V2M2 && VR <= RISCV::V6M2)
-    return VRegLMULLUT[VR - RISCV::V2M2];
-
-  return VRegLMULLUT[VR - RISCV::V24M2 + 4];
-}
-
 // For now we use x3, a.k.a gp, as pointer to shadow call stack.
 // User should not use x3 in their asm.
 static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB,
@@ -1564,6 +1535,7 @@ void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
   const MachineFrameInfo &MFI = MF->getFrameInfo();
   RISCVMachineFunctionInfo *RVFI = MF->getInfo<RISCVMachineFunctionInfo>();
   const TargetInstrInfo &TII = *STI.getInstrInfo();
+  const RISCVRegisterInfo &TRI = *STI.getRegisterInfo();
   DebugLoc DL = MBB.findDebugLoc(MI);
 
   const auto &RVVCSI = getRVVCalleeSavedInfo(*MF, MFI.getCalleeSavedInfo());
@@ -1583,11 +1555,18 @@ void RISCVFrameLowering::emitCalleeSavedRVVPrologCFI(
     // Insert the spill to the stack frame.
     int FI = CS.getFrameIdx();
     if (FI >= 0 && MFI.getStackID(FI) == TargetStackID::ScalableVector) {
-      auto VRegLMULPair = getCSBaseVRegLMULPair(CS.getReg());
-      for (int i = 0; i < VRegLMULPair.second; ++i) {
-        unsigned CFIIndex = MF->addFrameInst(
-            createDefCFAOffset(*STI.getRegisterInfo(), VRegLMULPair.first + i,
-                               -FixedSize, MFI.getObjectOffset(FI) / 8 + i));
+      MCRegister BaseReg = TRI.getSubReg(CS.getReg(), RISCV::sub_vrm1_0);
+      // If it's not a grouped vector register, it doesn't have subregister, so
+      // the base register is just itself.
+      if (BaseReg == RISCV::NoRegister)
+        BaseReg = CS.getReg();
+      unsigned NumRegs = RISCV::VRRegClass.contains(CS.getReg())     ? 1
+                         : RISCV::VRM2RegClass.contains(CS.getReg()) ? 2
+                         : RISCV::VRM4RegClass.contains(CS.getReg()) ? 4
+                                                                     : 8;
+      for (unsigned i = 0; i < NumRegs; ++i) {
+        unsigned CFIIndex = MF->addFrameInst(createDefCFAOffset(
+            TRI, BaseReg + i, -FixedSize, MFI.getObjectOffset(FI) / 8 + i));
         BuildMI(MBB, MI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
             .addCFIIndex(CFIIndex)
             .setMIFlag(MachineInstr::FrameSetup);



More information about the llvm-commits mailing list