[llvm] [RISCV] Fix double counting CSRs with Zcmp in RISCVFrameLowering::getFrameIndexReference. (PR #117207)

Thu Nov 21 10:36:32 PST 2024

https://github.com/topperc created https://github.com/llvm/llvm-project/pull/117207

The Zcmp callee saved registers are already accounted for in
getCalleeSavedStackSize(). Subtracting RVPushStackSize subtracts
them a second time leading to incorrect stack offsets during frame
index elimination.
    
This should have been removed in 0de2b26942f890a6ec84cd75ac7abe3f6f2b2e37
when Zcmp handling was changed. Prior to that, RVPushStackSize was
not included in getCalleeSavedStackSize(). The commit message at the
time noted that Zcmp+RVV was likely broken.

>From bfc6ec9052c8596638757b19b2843aa5e36ad721 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 21 Nov 2024 09:48:39 -0800
Subject: [PATCH 1/2] [RISCV] Add test case for Zcmp+vector stack layout issue.
 NFC

---
 .../RISCV/rvv/rv32-spill-vector-csr.ll        | 29 +++++++++++++++++++
 .../RISCV/rvv/rv64-spill-vector-csr.ll        | 29 +++++++++++++++++++
 2 files changed, 58 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
index ac74a82e79e6d0..26bd6b5b5df333 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
@@ -3,6 +3,8 @@
 ; RUN:    | FileCheck --check-prefix=SPILL-O0 %s
 ; RUN: llc -mtriple=riscv32 -mattr=+v,+d -O2 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
+; RUN: llc -mtriple=riscv32 -mattr=+v,+d,+zcmp -O2 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O2-ZCMP %s
 
 @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
 
@@ -82,6 +84,33 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
 ; SPILL-O2-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
 ; SPILL-O2-NEXT:    addi sp, sp, 32
 ; SPILL-O2-NEXT:    ret
+;
+; SPILL-O2-ZCMP-LABEL: foo:
+; SPILL-O2-ZCMP:       # %bb.0:
+; SPILL-O2-ZCMP-NEXT:    cm.push {ra, s0}, -32
+; SPILL-O2-ZCMP-NEXT:    csrr a1, vlenb
+; SPILL-O2-ZCMP-NEXT:    slli a1, a1, 1
+; SPILL-O2-ZCMP-NEXT:    sub sp, sp, a1
+; SPILL-O2-ZCMP-NEXT:    mv s0, a0
+; SPILL-O2-ZCMP-NEXT:    vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-ZCMP-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; SPILL-O2-ZCMP-NEXT:    vfadd.vv v9, v8, v9
+; SPILL-O2-ZCMP-NEXT:    csrr a0, vlenb
+; SPILL-O2-ZCMP-NEXT:    add a0, a0, sp
+; SPILL-O2-ZCMP-NEXT:    vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-ZCMP-NEXT:    lui a0, %hi(.L.str)
+; SPILL-O2-ZCMP-NEXT:    addi a0, a0, %lo(.L.str)
+; SPILL-O2-ZCMP-NEXT:    call puts
+; SPILL-O2-ZCMP-NEXT:    csrr a0, vlenb
+; SPILL-O2-ZCMP-NEXT:    add a0, a0, sp
+; SPILL-O2-ZCMP-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-ZCMP-NEXT:    vl1r.v v9, (sp) # Unknown-size Folded Reload
+; SPILL-O2-ZCMP-NEXT:    vsetvli zero, s0, e64, m1, ta, ma
+; SPILL-O2-ZCMP-NEXT:    vfadd.vv v8, v9, v8
+; SPILL-O2-ZCMP-NEXT:    csrr a0, vlenb
+; SPILL-O2-ZCMP-NEXT:    slli a0, a0, 1
+; SPILL-O2-ZCMP-NEXT:    add sp, sp, a0
+; SPILL-O2-ZCMP-NEXT:    cm.popret {ra, s0}, 32
 {
    %x = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i32 7, i32 %gvl)
    %call = call signext i32 @puts(ptr @.str)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
index 9054048f2f747a..6e2ba78ab868a7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
@@ -5,6 +5,8 @@
 ; RUN:    | FileCheck --check-prefix=SPILL-O2 %s
 ; RUN: llc -mtriple=riscv64 -mattr=+v,+d -mattr=+d -riscv-v-vector-bits-max=128 -O2 < %s \
 ; RUN:    | FileCheck --check-prefix=SPILL-O2-VLEN128 %s
+; RUN: llc -mtriple=riscv64 -mattr=+v,+d,+zcmp -O2 < %s \
+; RUN:    | FileCheck --check-prefix=SPILL-O2-ZCMP %s
 
 
 @.str = private unnamed_addr constant [6 x i8] c"hello\00", align 1
@@ -113,6 +115,33 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
 ; SPILL-O2-VLEN128-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
 ; SPILL-O2-VLEN128-NEXT:    addi sp, sp, 32
 ; SPILL-O2-VLEN128-NEXT:    ret
+;
+; SPILL-O2-ZCMP-LABEL: foo:
+; SPILL-O2-ZCMP:       # %bb.0:
+; SPILL-O2-ZCMP-NEXT:    cm.push {ra, s0}, -32
+; SPILL-O2-ZCMP-NEXT:    csrr a1, vlenb
+; SPILL-O2-ZCMP-NEXT:    slli a1, a1, 1
+; SPILL-O2-ZCMP-NEXT:    sub sp, sp, a1
+; SPILL-O2-ZCMP-NEXT:    mv s0, a0
+; SPILL-O2-ZCMP-NEXT:    vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-ZCMP-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; SPILL-O2-ZCMP-NEXT:    vfadd.vv v9, v8, v9
+; SPILL-O2-ZCMP-NEXT:    csrr a0, vlenb
+; SPILL-O2-ZCMP-NEXT:    add a0, a0, sp
+; SPILL-O2-ZCMP-NEXT:    vs1r.v v9, (a0) # Unknown-size Folded Spill
+; SPILL-O2-ZCMP-NEXT:    lui a0, %hi(.L.str)
+; SPILL-O2-ZCMP-NEXT:    addi a0, a0, %lo(.L.str)
+; SPILL-O2-ZCMP-NEXT:    call puts
+; SPILL-O2-ZCMP-NEXT:    csrr a0, vlenb
+; SPILL-O2-ZCMP-NEXT:    add a0, a0, sp
+; SPILL-O2-ZCMP-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
+; SPILL-O2-ZCMP-NEXT:    vl1r.v v9, (sp) # Unknown-size Folded Reload
+; SPILL-O2-ZCMP-NEXT:    vsetvli zero, s0, e64, m1, ta, ma
+; SPILL-O2-ZCMP-NEXT:    vfadd.vv v8, v9, v8
+; SPILL-O2-ZCMP-NEXT:    csrr a0, vlenb
+; SPILL-O2-ZCMP-NEXT:    slli a0, a0, 1
+; SPILL-O2-ZCMP-NEXT:    add sp, sp, a0
+; SPILL-O2-ZCMP-NEXT:    cm.popret {ra, s0}, 32
 {
    %x = call <vscale x 1 x double> @llvm.riscv.vfadd.nxv1f64.nxv1f64(<vscale x 1 x double> undef, <vscale x 1 x double> %a, <vscale x 1 x double> %b, i64 7, i64 %gvl)
    %call = call signext i32 @puts(ptr @.str)

>From 8a871bca4673f7481d8366cbe820869868afc230 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper at sifive.com>
Date: Thu, 21 Nov 2024 09:51:46 -0800
Subject: [PATCH 2/2] [RISCV] Fix double counting CSRs with Zcmp in
 RISCVFrameLowering::getFrameIndexReference.

The Zcmp callee saved registers are already accounted for in
getCalleeSavedStackSize(). Subtracting RVPushStackSize subtracts
them a second time leading to incorrect stack offsets during frame
index elimination.

This should have been removed in 0de2b26942f890a6ec84cd75ac7abe3f6f2b2e37
when Zcmp handling was changed. Prior to that, RVPushStackSize was
not included in getCalleeSavedStackSize(). The commit message at the
time noted that Zcmp+RVV was likely broken.
---
 llvm/lib/Target/RISCV/RISCVFrameLowering.cpp         | 1 -
 llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll | 8 ++++++--
 llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll | 8 ++++++--
 3 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
index 1d91d46cb30ee0..f0bc74e331db46 100644
--- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp
@@ -1205,7 +1205,6 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
     // alignment padding.
     int ScalarLocalVarSize = MFI.getStackSize() -
                              RVFI->getCalleeSavedStackSize() -
-                             RVFI->getRVPushStackSize() -
                              RVFI->getVarArgsSaveSize() + RVFI->getRVVPadding();
     Offset += StackOffset::get(ScalarLocalVarSize, RVFI->getRVVStackSize());
   }
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
index 26bd6b5b5df333..aef160049106b9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv32-spill-vector-csr.ll
@@ -92,19 +92,23 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
 ; SPILL-O2-ZCMP-NEXT:    slli a1, a1, 1
 ; SPILL-O2-ZCMP-NEXT:    sub sp, sp, a1
 ; SPILL-O2-ZCMP-NEXT:    mv s0, a0
-; SPILL-O2-ZCMP-NEXT:    vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-ZCMP-NEXT:    addi a1, sp, 16
+; SPILL-O2-ZCMP-NEXT:    vs1r.v v8, (a1) # Unknown-size Folded Spill
 ; SPILL-O2-ZCMP-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; SPILL-O2-ZCMP-NEXT:    vfadd.vv v9, v8, v9
 ; SPILL-O2-ZCMP-NEXT:    csrr a0, vlenb
 ; SPILL-O2-ZCMP-NEXT:    add a0, a0, sp
+; SPILL-O2-ZCMP-NEXT:    addi a0, a0, 16
 ; SPILL-O2-ZCMP-NEXT:    vs1r.v v9, (a0) # Unknown-size Folded Spill
 ; SPILL-O2-ZCMP-NEXT:    lui a0, %hi(.L.str)
 ; SPILL-O2-ZCMP-NEXT:    addi a0, a0, %lo(.L.str)
 ; SPILL-O2-ZCMP-NEXT:    call puts
 ; SPILL-O2-ZCMP-NEXT:    csrr a0, vlenb
 ; SPILL-O2-ZCMP-NEXT:    add a0, a0, sp
+; SPILL-O2-ZCMP-NEXT:    addi a0, a0, 16
 ; SPILL-O2-ZCMP-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; SPILL-O2-ZCMP-NEXT:    vl1r.v v9, (sp) # Unknown-size Folded Reload
+; SPILL-O2-ZCMP-NEXT:    addi a0, sp, 16
+; SPILL-O2-ZCMP-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
 ; SPILL-O2-ZCMP-NEXT:    vsetvli zero, s0, e64, m1, ta, ma
 ; SPILL-O2-ZCMP-NEXT:    vfadd.vv v8, v9, v8
 ; SPILL-O2-ZCMP-NEXT:    csrr a0, vlenb
diff --git a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
index 6e2ba78ab868a7..c7c44fb0e12158 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rv64-spill-vector-csr.ll
@@ -123,19 +123,23 @@ define <vscale x 1 x double> @foo(<vscale x 1 x double> %a, <vscale x 1 x double
 ; SPILL-O2-ZCMP-NEXT:    slli a1, a1, 1
 ; SPILL-O2-ZCMP-NEXT:    sub sp, sp, a1
 ; SPILL-O2-ZCMP-NEXT:    mv s0, a0
-; SPILL-O2-ZCMP-NEXT:    vs1r.v v8, (sp) # Unknown-size Folded Spill
+; SPILL-O2-ZCMP-NEXT:    addi a1, sp, 16
+; SPILL-O2-ZCMP-NEXT:    vs1r.v v8, (a1) # Unknown-size Folded Spill
 ; SPILL-O2-ZCMP-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
 ; SPILL-O2-ZCMP-NEXT:    vfadd.vv v9, v8, v9
 ; SPILL-O2-ZCMP-NEXT:    csrr a0, vlenb
 ; SPILL-O2-ZCMP-NEXT:    add a0, a0, sp
+; SPILL-O2-ZCMP-NEXT:    addi a0, a0, 16
 ; SPILL-O2-ZCMP-NEXT:    vs1r.v v9, (a0) # Unknown-size Folded Spill
 ; SPILL-O2-ZCMP-NEXT:    lui a0, %hi(.L.str)
 ; SPILL-O2-ZCMP-NEXT:    addi a0, a0, %lo(.L.str)
 ; SPILL-O2-ZCMP-NEXT:    call puts
 ; SPILL-O2-ZCMP-NEXT:    csrr a0, vlenb
 ; SPILL-O2-ZCMP-NEXT:    add a0, a0, sp
+; SPILL-O2-ZCMP-NEXT:    addi a0, a0, 16
 ; SPILL-O2-ZCMP-NEXT:    vl1r.v v8, (a0) # Unknown-size Folded Reload
-; SPILL-O2-ZCMP-NEXT:    vl1r.v v9, (sp) # Unknown-size Folded Reload
+; SPILL-O2-ZCMP-NEXT:    addi a0, sp, 16
+; SPILL-O2-ZCMP-NEXT:    vl1r.v v9, (a0) # Unknown-size Folded Reload
 ; SPILL-O2-ZCMP-NEXT:    vsetvli zero, s0, e64, m1, ta, ma
 ; SPILL-O2-ZCMP-NEXT:    vfadd.vv v8, v9, v8
 ; SPILL-O2-ZCMP-NEXT:    csrr a0, vlenb