[llvm] [RISCV] Fix wrong offset use caused by missing the size of Zcmp push. (PR #66613)

Wed Sep 20 00:45:05 PDT 2023

https://github.com/yetingk updated https://github.com/llvm/llvm-project/pull/66613

>From fc864b51c7d2c9f5cfd8df72aa36714de5359e44 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 18 Sep 2023 11:39:26 +0800
Subject: [PATCH 1/2] [RISCV] Precommit test.

---
 llvm/test/CodeGen/RISCV/push-pop-popret.ll | 120 +++++++++++++++++++++
 llvm/test/CodeGen/RISCV/zcmp-with-float.ll |  43 ++++++++
 2 files changed, 163 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
index ffa7cb6389d5211..8915b313e050e80 100644
--- a/llvm/test/CodeGen/RISCV/push-pop-popret.ll
+++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
@@ -3122,3 +3122,123 @@ define void @callee_no_irq() nounwind{
   store volatile [32 x i32] %val, [32 x i32]* @var_test_irq
   ret void
 }
+
+declare void @bar(ptr, ptr)
+declare ptr @llvm.frameaddress.p0(i32 immarg)
+
+define i32 @use_fp(i32 %x) {
+; RV32IZCMP-LABEL: use_fp:
+; RV32IZCMP:       # %bb.0: # %entry
+; RV32IZCMP-NEXT:    cm.push {ra, s0-s1}, -32
+; RV32IZCMP-NEXT:    .cfi_def_cfa_offset 32
+; RV32IZCMP-NEXT:    .cfi_offset ra, -12
+; RV32IZCMP-NEXT:    .cfi_offset s0, -8
+; RV32IZCMP-NEXT:    .cfi_offset s1, -4
+; RV32IZCMP-NEXT:    addi s0, sp, 32
+; RV32IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV32IZCMP-NEXT:    mv s1, a0
+; RV32IZCMP-NEXT:    addi a1, s0, -4
+; RV32IZCMP-NEXT:    mv a0, s0
+; RV32IZCMP-NEXT:    call bar at plt
+; RV32IZCMP-NEXT:    mv a0, s1
+; RV32IZCMP-NEXT:    cm.popret {ra, s0-s1}, 32
+;
+; RV64IZCMP-LABEL: use_fp:
+; RV64IZCMP:       # %bb.0: # %entry
+; RV64IZCMP-NEXT:    cm.push {ra, s0-s1}, -48
+; RV64IZCMP-NEXT:    .cfi_def_cfa_offset 48
+; RV64IZCMP-NEXT:    .cfi_offset ra, -24
+; RV64IZCMP-NEXT:    .cfi_offset s0, -16
+; RV64IZCMP-NEXT:    .cfi_offset s1, -8
+; RV64IZCMP-NEXT:    addi s0, sp, 48
+; RV64IZCMP-NEXT:    .cfi_def_cfa s0, 0
+; RV64IZCMP-NEXT:    mv s1, a0
+; RV64IZCMP-NEXT:    addi a1, s0, -4
+; RV64IZCMP-NEXT:    mv a0, s0
+; RV64IZCMP-NEXT:    call bar at plt
+; RV64IZCMP-NEXT:    mv a0, s1
+; RV64IZCMP-NEXT:    cm.popret {ra, s0-s1}, 48
+;
+; RV32IZCMP-SR-LABEL: use_fp:
+; RV32IZCMP-SR:       # %bb.0: # %entry
+; RV32IZCMP-SR-NEXT:    cm.push {ra, s0-s1}, -32
+; RV32IZCMP-SR-NEXT:    .cfi_def_cfa_offset 32
+; RV32IZCMP-SR-NEXT:    .cfi_offset ra, -12
+; RV32IZCMP-SR-NEXT:    .cfi_offset s0, -8
+; RV32IZCMP-SR-NEXT:    .cfi_offset s1, -4
+; RV32IZCMP-SR-NEXT:    addi s0, sp, 32
+; RV32IZCMP-SR-NEXT:    .cfi_def_cfa s0, 0
+; RV32IZCMP-SR-NEXT:    mv s1, a0
+; RV32IZCMP-SR-NEXT:    addi a1, s0, -4
+; RV32IZCMP-SR-NEXT:    mv a0, s0
+; RV32IZCMP-SR-NEXT:    call bar at plt
+; RV32IZCMP-SR-NEXT:    mv a0, s1
+; RV32IZCMP-SR-NEXT:    cm.popret {ra, s0-s1}, 32
+;
+; RV64IZCMP-SR-LABEL: use_fp:
+; RV64IZCMP-SR:       # %bb.0: # %entry
+; RV64IZCMP-SR-NEXT:    cm.push {ra, s0-s1}, -48
+; RV64IZCMP-SR-NEXT:    .cfi_def_cfa_offset 48
+; RV64IZCMP-SR-NEXT:    .cfi_offset ra, -24
+; RV64IZCMP-SR-NEXT:    .cfi_offset s0, -16
+; RV64IZCMP-SR-NEXT:    .cfi_offset s1, -8
+; RV64IZCMP-SR-NEXT:    addi s0, sp, 48
+; RV64IZCMP-SR-NEXT:    .cfi_def_cfa s0, 0
+; RV64IZCMP-SR-NEXT:    mv s1, a0
+; RV64IZCMP-SR-NEXT:    addi a1, s0, -4
+; RV64IZCMP-SR-NEXT:    mv a0, s0
+; RV64IZCMP-SR-NEXT:    call bar at plt
+; RV64IZCMP-SR-NEXT:    mv a0, s1
+; RV64IZCMP-SR-NEXT:    cm.popret {ra, s0-s1}, 48
+;
+; RV32I-LABEL: use_fp:
+; RV32I:       # %bb.0: # %entry
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    .cfi_offset ra, -4
+; RV32I-NEXT:    .cfi_offset s0, -8
+; RV32I-NEXT:    .cfi_offset s1, -12
+; RV32I-NEXT:    addi s0, sp, 16
+; RV32I-NEXT:    .cfi_def_cfa s0, 0
+; RV32I-NEXT:    mv s1, a0
+; RV32I-NEXT:    addi a1, s0, -16
+; RV32I-NEXT:    mv a0, s0
+; RV32I-NEXT:    call bar at plt
+; RV32I-NEXT:    mv a0, s1
+; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV64I-LABEL: use_fp:
+; RV64I:       # %bb.0: # %entry
+; RV64I-NEXT:    addi sp, sp, -32
+; RV64I-NEXT:    .cfi_def_cfa_offset 32
+; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
+; RV64I-NEXT:    .cfi_offset ra, -8
+; RV64I-NEXT:    .cfi_offset s0, -16
+; RV64I-NEXT:    .cfi_offset s1, -24
+; RV64I-NEXT:    addi s0, sp, 32
+; RV64I-NEXT:    .cfi_def_cfa s0, 0
+; RV64I-NEXT:    mv s1, a0
+; RV64I-NEXT:    addi a1, s0, -28
+; RV64I-NEXT:    mv a0, s0
+; RV64I-NEXT:    call bar at plt
+; RV64I-NEXT:    mv a0, s1
+; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
+; RV64I-NEXT:    addi sp, sp, 32
+; RV64I-NEXT:    ret
+entry:
+  %var = alloca i32, align 4
+  %0 = tail call ptr @llvm.frameaddress.p0(i32 0)
+  call void @bar(ptr %0, ptr %var)
+  ret i32 %x
+}
diff --git a/llvm/test/CodeGen/RISCV/zcmp-with-float.ll b/llvm/test/CodeGen/RISCV/zcmp-with-float.ll
index 05ee92c89db7ca2..3c5cce1ba5e5b76 100644
--- a/llvm/test/CodeGen/RISCV/zcmp-with-float.ll
+++ b/llvm/test/CodeGen/RISCV/zcmp-with-float.ll
@@ -36,3 +36,46 @@ entry:
   call void @callee()
   ret float %arg
 }
+
+define void @foo2(i32 %x, float %y) {
+; RV32-LABEL: foo2:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    cm.push {ra, s0}, -32
+; RV32-NEXT:    .cfi_def_cfa_offset 32
+; RV32-NEXT:    fsw fs0, 12(sp) # 4-byte Folded Spill
+; RV32-NEXT:    .cfi_offset ra, -8
+; RV32-NEXT:    .cfi_offset s0, -4
+; RV32-NEXT:    .cfi_offset fs0, -4
+; RV32-NEXT:    fmv.s fs0, fa0
+; RV32-NEXT:    mv s0, a0
+; RV32-NEXT:    call bar at plt
+; RV32-NEXT:    mv a0, s0
+; RV32-NEXT:    fmv.s fa0, fs0
+; RV32-NEXT:    flw fs0, 12(sp) # 4-byte Folded Reload
+; RV32-NEXT:    cm.pop {ra, s0}, 32
+; RV32-NEXT:    tail func at plt
+;
+; RV64-LABEL: foo2:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    cm.push {ra, s0}, -32
+; RV64-NEXT:    .cfi_def_cfa_offset 32
+; RV64-NEXT:    fsw fs0, 12(sp) # 4-byte Folded Spill
+; RV64-NEXT:    .cfi_offset ra, -16
+; RV64-NEXT:    .cfi_offset s0, -8
+; RV64-NEXT:    .cfi_offset fs0, -4
+; RV64-NEXT:    fmv.s fs0, fa0
+; RV64-NEXT:    mv s0, a0
+; RV64-NEXT:    call bar at plt
+; RV64-NEXT:    mv a0, s0
+; RV64-NEXT:    fmv.s fa0, fs0
+; RV64-NEXT:    flw fs0, 12(sp) # 4-byte Folded Reload
+; RV64-NEXT:    cm.pop {ra, s0}, 32
+; RV64-NEXT:    tail func at plt
+entry:
+  tail call void @bar()
+  tail call void @func(i32 %x, float %y)
+  ret void
+}
+
+declare void @bar()
+declare void @func(i32, float)

>From ae1d5fe9fa241ff1a60d2853a092ada0bb901c87 Mon Sep 17 00:00:00 2001
From: Yeting Kuo <yeting.kuo at sifive.com>
Date: Mon, 18 Sep 2023 13:08:24 +0800
Subject: [PATCH 2/2] [RISCV] Fix wrong offset use caused by missing the size
 of Zcmp push.

This fixes two wrong offset uses,
1. .cfi_offset of callee saves are not pushed by cm.push.
2. Reference of frame objests by frame pointer.
---
 llvm/lib/Target/RISCV/RISCVFrameLowering.cpp     | 16 ++++++----------
 llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h |  4 ++++
 llvm/test/CodeGen/RISCV/push-pop-popret.ll       |  8 ++++----
 llvm/test/CodeGen/RISCV/zcmp-with-float.ll       |  8 ++++----
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 50e98e6b8ea99a0..d873da7d684cfe2 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -506,8 +506,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
   // FIXME (note copied from Lanai): This appears to be overallocating.  Needs
   // investigation. Get the number of bytes to allocate from the FrameInfo.
   uint64_t StackSize = getStackSizeWithRVVPadding(MF);
-  uint64_t RealStackSize =
-      StackSize + RVFI->getLibCallStackSize() + RVFI->getRVPushStackSize();
+  uint64_t RealStackSize = StackSize + RVFI->getReservedSpillsSize();
   uint64_t RVVStackSize = RVFI->getRVVStackSize();
 
   // Early exit if there is no need to allocate on the stack
@@ -575,8 +574,7 @@ void RISCVFrameLowering::emitPrologue(MachineFunction &MF,
         Offset = FrameIdx * (int64_t)STI.getXLen() / 8;
       }
     } else {
-      Offset = MFI.getObjectOffset(FrameIdx) -
-               RVFI->getLibCallStackSize();
+      Offset = MFI.getObjectOffset(FrameIdx) - RVFI->getReservedSpillsSize();
     }
     Register Reg = Entry.getReg();
     unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
@@ -721,8 +719,7 @@ void RISCVFrameLowering::emitEpilogue(MachineFunction &MF,
     LastFrameDestroy = std::prev(MBBI, CSI.size());
 
   uint64_t StackSize = getStackSizeWithRVVPadding(MF);
-  uint64_t RealStackSize =
-      StackSize + RVFI->getLibCallStackSize() + RVFI->getRVPushStackSize();
+  uint64_t RealStackSize = StackSize + RVFI->getReservedSpillsSize();
   uint64_t FPOffset = RealStackSize - RVFI->getVarArgsSaveSize();
   uint64_t RVVStackSize = RVFI->getRVVStackSize();
 
@@ -873,7 +870,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
   if (FrameReg == getFPReg(STI)) {
     Offset += StackOffset::getFixed(RVFI->getVarArgsSaveSize());
     if (FI >= 0)
-      Offset -= StackOffset::getFixed(RVFI->getLibCallStackSize());
+      Offset -= StackOffset::getFixed(RVFI->getReservedSpillsSize());
     // When using FP to access scalable vector objects, we need to minus
     // the frame size.
     //
@@ -941,8 +938,7 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
       assert(!RI->hasStackRealignment(MF) &&
              "Can't index across variable sized realign");
       Offset += StackOffset::get(getStackSizeWithRVVPadding(MF) +
-                                     RVFI->getLibCallStackSize() +
-                                     RVFI->getRVPushStackSize(),
+                                     RVFI->getReservedSpillsSize(),
                                  RVFI->getRVVStackSize());
     } else {
       Offset += StackOffset::getFixed(MFI.getStackSize());
@@ -1287,7 +1283,7 @@ RISCVFrameLowering::getFirstSPAdjustAmount(const MachineFunction &MF) const {
   // Disable SplitSPAdjust if save-restore libcall is used. The callee-saved
   // registers will be pushed by the save-restore libcalls, so we don't have to
   // split the SP adjustment in this case.
-  if (RVFI->getLibCallStackSize() || RVFI->getRVPushStackSize())
+  if (RVFI->getReservedSpillsSize())
     return 0;
 
   // Return the FirstSPAdjustAmount if the StackSize can not fit in a signed
diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
index 6ee5790b272adbb..fcc20c17c6b4032 100644
--- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h
@@ -104,6 +104,10 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo {
     BranchRelaxationScratchFrameIndex = Index;
   }
 
+  unsigned getReservedSpillsSize() const {
+    return LibCallStackSize + RVPushStackSize;
+  }
+
   unsigned getLibCallStackSize() const { return LibCallStackSize; }
   void setLibCallStackSize(unsigned Size) { LibCallStackSize = Size; }
 
diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
index 8915b313e050e80..776944b177636c2 100644
--- a/llvm/test/CodeGen/RISCV/push-pop-popret.ll
+++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll
@@ -3137,7 +3137,7 @@ define i32 @use_fp(i32 %x) {
 ; RV32IZCMP-NEXT:    addi s0, sp, 32
 ; RV32IZCMP-NEXT:    .cfi_def_cfa s0, 0
 ; RV32IZCMP-NEXT:    mv s1, a0
-; RV32IZCMP-NEXT:    addi a1, s0, -4
+; RV32IZCMP-NEXT:    addi a1, s0, -20
 ; RV32IZCMP-NEXT:    mv a0, s0
 ; RV32IZCMP-NEXT:    call bar at plt
 ; RV32IZCMP-NEXT:    mv a0, s1
@@ -3153,7 +3153,7 @@ define i32 @use_fp(i32 %x) {
 ; RV64IZCMP-NEXT:    addi s0, sp, 48
 ; RV64IZCMP-NEXT:    .cfi_def_cfa s0, 0
 ; RV64IZCMP-NEXT:    mv s1, a0
-; RV64IZCMP-NEXT:    addi a1, s0, -4
+; RV64IZCMP-NEXT:    addi a1, s0, -36
 ; RV64IZCMP-NEXT:    mv a0, s0
 ; RV64IZCMP-NEXT:    call bar at plt
 ; RV64IZCMP-NEXT:    mv a0, s1
@@ -3169,7 +3169,7 @@ define i32 @use_fp(i32 %x) {
 ; RV32IZCMP-SR-NEXT:    addi s0, sp, 32
 ; RV32IZCMP-SR-NEXT:    .cfi_def_cfa s0, 0
 ; RV32IZCMP-SR-NEXT:    mv s1, a0
-; RV32IZCMP-SR-NEXT:    addi a1, s0, -4
+; RV32IZCMP-SR-NEXT:    addi a1, s0, -20
 ; RV32IZCMP-SR-NEXT:    mv a0, s0
 ; RV32IZCMP-SR-NEXT:    call bar at plt
 ; RV32IZCMP-SR-NEXT:    mv a0, s1
@@ -3185,7 +3185,7 @@ define i32 @use_fp(i32 %x) {
 ; RV64IZCMP-SR-NEXT:    addi s0, sp, 48
 ; RV64IZCMP-SR-NEXT:    .cfi_def_cfa s0, 0
 ; RV64IZCMP-SR-NEXT:    mv s1, a0
-; RV64IZCMP-SR-NEXT:    addi a1, s0, -4
+; RV64IZCMP-SR-NEXT:    addi a1, s0, -36
 ; RV64IZCMP-SR-NEXT:    mv a0, s0
 ; RV64IZCMP-SR-NEXT:    call bar at plt
 ; RV64IZCMP-SR-NEXT:    mv a0, s1
diff --git a/llvm/test/CodeGen/RISCV/zcmp-with-float.ll b/llvm/test/CodeGen/RISCV/zcmp-with-float.ll
index 3c5cce1ba5e5b76..72213019b8c8722 100644
--- a/llvm/test/CodeGen/RISCV/zcmp-with-float.ll
+++ b/llvm/test/CodeGen/RISCV/zcmp-with-float.ll
@@ -13,7 +13,7 @@ define float @foo(float %arg) {
 ; RV32-NEXT:    .cfi_def_cfa_offset 32
 ; RV32-NEXT:    fsw fs0, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -4
-; RV32-NEXT:    .cfi_offset fs0, -4
+; RV32-NEXT:    .cfi_offset fs0, -20
 ; RV32-NEXT:    fmv.s fs0, fa0
 ; RV32-NEXT:    call callee at plt
 ; RV32-NEXT:    fmv.s fa0, fs0
@@ -26,7 +26,7 @@ define float @foo(float %arg) {
 ; RV64-NEXT:    .cfi_def_cfa_offset 32
 ; RV64-NEXT:    fsw fs0, 12(sp) # 4-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -8
-; RV64-NEXT:    .cfi_offset fs0, -4
+; RV64-NEXT:    .cfi_offset fs0, -20
 ; RV64-NEXT:    fmv.s fs0, fa0
 ; RV64-NEXT:    call callee at plt
 ; RV64-NEXT:    fmv.s fa0, fs0
@@ -45,7 +45,7 @@ define void @foo2(i32 %x, float %y) {
 ; RV32-NEXT:    fsw fs0, 12(sp) # 4-byte Folded Spill
 ; RV32-NEXT:    .cfi_offset ra, -8
 ; RV32-NEXT:    .cfi_offset s0, -4
-; RV32-NEXT:    .cfi_offset fs0, -4
+; RV32-NEXT:    .cfi_offset fs0, -20
 ; RV32-NEXT:    fmv.s fs0, fa0
 ; RV32-NEXT:    mv s0, a0
 ; RV32-NEXT:    call bar at plt
@@ -62,7 +62,7 @@ define void @foo2(i32 %x, float %y) {
 ; RV64-NEXT:    fsw fs0, 12(sp) # 4-byte Folded Spill
 ; RV64-NEXT:    .cfi_offset ra, -16
 ; RV64-NEXT:    .cfi_offset s0, -8
-; RV64-NEXT:    .cfi_offset fs0, -4
+; RV64-NEXT:    .cfi_offset fs0, -20
 ; RV64-NEXT:    fmv.s fs0, fa0
 ; RV64-NEXT:    mv s0, a0
 ; RV64-NEXT:    call bar at plt