[llvm] [RISCV] Rematerialize vmv.v.i (PR #107550)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 6 02:36:45 PDT 2024
https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/107550
This continues the line of work started in #97520, and gives a 2.5% reduction in the number of spills on SPEC CPU 2017.
Program regalloc.NumSpills regalloc.NumReloads regalloc.NumReMaterialization
lhs rhs diff lhs rhs diff lhs rhs diff
605.mcf_s 141.00 141.00 0.0% 372.00 372.00 0.0% 123.00 123.00 0.0%
505.mcf_r 141.00 141.00 0.0% 372.00 372.00 0.0% 123.00 123.00 0.0%
519.lbm_r 73.00 73.00 0.0% 75.00 75.00 0.0% 18.00 18.00 0.0%
619.lbm_s 68.00 68.00 0.0% 70.00 70.00 0.0% 20.00 20.00 0.0%
631.deepsjeng_s 354.00 353.00 -0.3% 683.00 682.00 -0.1% 529.00 530.00 0.2%
531.deepsjeng_r 354.00 353.00 -0.3% 683.00 682.00 -0.1% 529.00 530.00 0.2%
625.x264_s 1896.00 1886.00 -0.5% 4583.00 4561.00 -0.5% 2086.00 2108.00 1.1%
525.x264_r 1896.00 1886.00 -0.5% 4583.00 4561.00 -0.5% 2086.00 2108.00 1.1%
508.namd_r 6665.00 6598.00 -1.0% 15649.00 15509.00 -0.9% 3014.00 3164.00 5.0%
644.nab_s 761.00 753.00 -1.1% 1199.00 1183.00 -1.3% 1542.00 1559.00 1.1%
544.nab_r 761.00 753.00 -1.1% 1199.00 1183.00 -1.3% 1542.00 1559.00 1.1%
638.imagick_s 4287.00 4181.00 -2.5% 11624.00 11342.00 -2.4% 10551.00 10884.00 3.2%
538.imagick_r 4287.00 4181.00 -2.5% 11624.00 11342.00 -2.4% 10551.00 10884.00 3.2%
602.gcc_s 12771.00 12450.00 -2.5% 28117.00 27328.00 -2.8% 49757.00 50526.00 1.5%
502.gcc_r 12771.00 12450.00 -2.5% 28117.00 27328.00 -2.8% 49757.00 50526.00 1.5%
Geomean difference -2.5% -2.6% 1.8%
I initially held off submitting this patch because it surprisingly introduced a lot of spills in the test diffs, but after #107290 the vmv.v.is that caused them are now gone.
The gist is that marking vmv.v.i as spillable decreased its spill weight, which actually resulted in more m8 registers getting evicted and spilled during register allocation.
The SPEC results show this isn't an issue in practice though, and I plan on posting a separate patch to explain this in more detail.
>From 5b23e09eef2ae7ce8cb778751b476b0d9160146c Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 6 Sep 2024 16:17:34 +0800
Subject: [PATCH 1/2] Precommit test
---
llvm/test/CodeGen/RISCV/rvv/remat.ll | 45 ++++++++++++++++++++++++++++
1 file changed, 45 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/remat.ll b/llvm/test/CodeGen/RISCV/rvv/remat.ll
index d7a8a13dd36643..54c7ea67941c94 100644
--- a/llvm/test/CodeGen/RISCV/rvv/remat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/remat.ll
@@ -109,3 +109,48 @@ define void @vid_passthru(ptr %p, <vscale x 8 x i64> %v) {
store volatile <vscale x 8 x i64> %vid, ptr %p
ret void
}
+
+define void @vmv.v.i(ptr %p) {
+; CHECK-LABEL: vmv.v.i:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi sp, sp, -16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: sub sp, sp, a1
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 1
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: addi a1, sp, 16
+; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re64.v v24, (a0)
+; CHECK-NEXT: vl8re64.v v0, (a0)
+; CHECK-NEXT: vl8re64.v v16, (a0)
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vs8r.v v0, (a0)
+; CHECK-NEXT: vs8r.v v24, (a0)
+; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; CHECK-NEXT: vs8r.v v16, (a0)
+; CHECK-NEXT: vs8r.v v8, (a0)
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add sp, sp, a0
+; CHECK-NEXT: addi sp, sp, 16
+; CHECK-NEXT: ret
+ %vmv.v.i = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 1, i64 -1)
+ store volatile <vscale x 8 x i64> %vmv.v.i, ptr %p
+
+ %a = load volatile <vscale x 8 x i64>, ptr %p
+ %b = load volatile <vscale x 8 x i64>, ptr %p
+ %c = load volatile <vscale x 8 x i64>, ptr %p
+ %d = load volatile <vscale x 8 x i64>, ptr %p
+ store volatile <vscale x 8 x i64> %d, ptr %p
+ store volatile <vscale x 8 x i64> %c, ptr %p
+ store volatile <vscale x 8 x i64> %b, ptr %p
+ store volatile <vscale x 8 x i64> %a, ptr %p
+
+ store volatile <vscale x 8 x i64> %vmv.v.i, ptr %p
+ ret void
+}
>From 76d252b877fe3deeebf6a8111deb9f2fc6250d35 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 6 Sep 2024 16:18:32 +0800
Subject: [PATCH 2/2] [RISCV] Rematerialize vmv.v.i
This continues the line of work started in #97520, and gives a 2.5% reduction in the number of spills on SPEC CPU 2017.
Program regalloc.NumSpills regalloc.NumReloads regalloc.NumReMaterialization
lhs rhs diff lhs rhs diff lhs rhs diff
605.mcf_s 141.00 141.00 0.0% 372.00 372.00 0.0% 123.00 123.00 0.0%
505.mcf_r 141.00 141.00 0.0% 372.00 372.00 0.0% 123.00 123.00 0.0%
519.lbm_r 73.00 73.00 0.0% 75.00 75.00 0.0% 18.00 18.00 0.0%
619.lbm_s 68.00 68.00 0.0% 70.00 70.00 0.0% 20.00 20.00 0.0%
631.deepsjeng_s 354.00 353.00 -0.3% 683.00 682.00 -0.1% 529.00 530.00 0.2%
531.deepsjeng_r 354.00 353.00 -0.3% 683.00 682.00 -0.1% 529.00 530.00 0.2%
625.x264_s 1896.00 1886.00 -0.5% 4583.00 4561.00 -0.5% 2086.00 2108.00 1.1%
525.x264_r 1896.00 1886.00 -0.5% 4583.00 4561.00 -0.5% 2086.00 2108.00 1.1%
508.namd_r 6665.00 6598.00 -1.0% 15649.00 15509.00 -0.9% 3014.00 3164.00 5.0%
644.nab_s 761.00 753.00 -1.1% 1199.00 1183.00 -1.3% 1542.00 1559.00 1.1%
544.nab_r 761.00 753.00 -1.1% 1199.00 1183.00 -1.3% 1542.00 1559.00 1.1%
638.imagick_s 4287.00 4181.00 -2.5% 11624.00 11342.00 -2.4% 10551.00 10884.00 3.2%
538.imagick_r 4287.00 4181.00 -2.5% 11624.00 11342.00 -2.4% 10551.00 10884.00 3.2%
602.gcc_s 12771.00 12450.00 -2.5% 28117.00 27328.00 -2.8% 49757.00 50526.00 1.5%
502.gcc_r 12771.00 12450.00 -2.5% 28117.00 27328.00 -2.8% 49757.00 50526.00 1.5%
Geomean difference -2.5% -2.6% 1.8%
I initially held off submitting this patch because it surprisingly introduced a lot of spills in the test diffs, but after #107290 the vmv.v.is that caused them are now gone.
The gist is that marking vmv.v.i as spillable decreased its spill weight, which actually resulted in more m8 registers getting evicted and spilled during register allocation.
The SPEC results show this isn't an issue in practice though, and I plan on posting a separate patch to explain this in more detail.
---
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 20 +++--
.../Target/RISCV/RISCVInstrInfoVPseudos.td | 1 +
llvm/test/CodeGen/RISCV/rvv/remat.ll | 73 ++++++++++++-------
llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll | 1 +
4 files changed, 60 insertions(+), 35 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 0a64a8e1440084..325a50c9f48a1c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -168,13 +168,19 @@ Register RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
bool RISCVInstrInfo::isReallyTriviallyReMaterializable(
const MachineInstr &MI) const {
- if (RISCV::getRVVMCOpcode(MI.getOpcode()) == RISCV::VID_V &&
- MI.getOperand(1).isUndef() &&
- /* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl and
- vtype. Make sure we only rematerialize before RISCVInsertVSETVLI
- i.e. -riscv-vsetvl-after-rvv-regalloc=true */
- !MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
- return true;
+ switch (RISCV::getRVVMCOpcode(MI.getOpcode())) {
+ case RISCV::VMV_V_I:
+ case RISCV::VID_V:
+ if (MI.getOperand(1).isUndef() &&
+ /* After RISCVInsertVSETVLI most pseudos will have implicit uses on vl
+ and vtype. Make sure we only rematerialize before RISCVInsertVSETVLI
+ i.e. -riscv-vsetvl-after-rvv-regalloc=true */
+ !MI.hasRegisterImplicitUseOperand(RISCV::VTYPE))
+ return true;
+ break;
+ default:
+ break;
+ }
return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index c91c9c3614a34c..e11f176bfe6041 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -2478,6 +2478,7 @@ multiclass VPseudoUnaryVMV_V_X_I {
def "_X_" # mx : VPseudoUnaryNoMask<m.vrclass, GPR>,
SchedUnary<"WriteVIMovX", "ReadVIMovX", mx,
forcePassthruRead=true>;
+ let isReMaterializable = 1 in
def "_I_" # mx : VPseudoUnaryNoMask<m.vrclass, simm5>,
SchedNullary<"WriteVIMovI", mx,
forcePassthruRead=true>;
diff --git a/llvm/test/CodeGen/RISCV/rvv/remat.ll b/llvm/test/CodeGen/RISCV/rvv/remat.ll
index 54c7ea67941c94..2b12249378eb1f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/remat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/remat.ll
@@ -111,34 +111,51 @@ define void @vid_passthru(ptr %p, <vscale x 8 x i64> %v) {
}
define void @vmv.v.i(ptr %p) {
-; CHECK-LABEL: vmv.v.i:
-; CHECK: # %bb.0:
-; CHECK-NEXT: addi sp, sp, -16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
-; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
-; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 1
-; CHECK-NEXT: vs8r.v v8, (a0)
-; CHECK-NEXT: vl8re64.v v16, (a0)
-; CHECK-NEXT: addi a1, sp, 16
-; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; CHECK-NEXT: vl8re64.v v24, (a0)
-; CHECK-NEXT: vl8re64.v v0, (a0)
-; CHECK-NEXT: vl8re64.v v16, (a0)
-; CHECK-NEXT: vs8r.v v16, (a0)
-; CHECK-NEXT: vs8r.v v0, (a0)
-; CHECK-NEXT: vs8r.v v24, (a0)
-; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
-; CHECK-NEXT: vs8r.v v16, (a0)
-; CHECK-NEXT: vs8r.v v8, (a0)
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
-; CHECK-NEXT: add sp, sp, a0
-; CHECK-NEXT: addi sp, sp, 16
-; CHECK-NEXT: ret
+; POSTRA-LABEL: vmv.v.i:
+; POSTRA: # %bb.0:
+; POSTRA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; POSTRA-NEXT: vmv.v.i v8, 1
+; POSTRA-NEXT: vs8r.v v8, (a0)
+; POSTRA-NEXT: vl8re64.v v16, (a0)
+; POSTRA-NEXT: vl8re64.v v24, (a0)
+; POSTRA-NEXT: vl8re64.v v0, (a0)
+; POSTRA-NEXT: vl8re64.v v8, (a0)
+; POSTRA-NEXT: vs8r.v v8, (a0)
+; POSTRA-NEXT: vs8r.v v0, (a0)
+; POSTRA-NEXT: vs8r.v v24, (a0)
+; POSTRA-NEXT: vs8r.v v16, (a0)
+; POSTRA-NEXT: vmv.v.i v8, 1
+; POSTRA-NEXT: vs8r.v v8, (a0)
+; POSTRA-NEXT: ret
+;
+; PRERA-LABEL: vmv.v.i:
+; PRERA: # %bb.0:
+; PRERA-NEXT: addi sp, sp, -16
+; PRERA-NEXT: .cfi_def_cfa_offset 16
+; PRERA-NEXT: csrr a1, vlenb
+; PRERA-NEXT: slli a1, a1, 3
+; PRERA-NEXT: sub sp, sp, a1
+; PRERA-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; PRERA-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; PRERA-NEXT: vmv.v.i v8, 1
+; PRERA-NEXT: vs8r.v v8, (a0)
+; PRERA-NEXT: vl8re64.v v16, (a0)
+; PRERA-NEXT: addi a1, sp, 16
+; PRERA-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; PRERA-NEXT: vl8re64.v v24, (a0)
+; PRERA-NEXT: vl8re64.v v0, (a0)
+; PRERA-NEXT: vl8re64.v v16, (a0)
+; PRERA-NEXT: vs8r.v v16, (a0)
+; PRERA-NEXT: vs8r.v v0, (a0)
+; PRERA-NEXT: vs8r.v v24, (a0)
+; PRERA-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
+; PRERA-NEXT: vs8r.v v16, (a0)
+; PRERA-NEXT: vs8r.v v8, (a0)
+; PRERA-NEXT: csrr a0, vlenb
+; PRERA-NEXT: slli a0, a0, 3
+; PRERA-NEXT: add sp, sp, a0
+; PRERA-NEXT: addi sp, sp, 16
+; PRERA-NEXT: ret
%vmv.v.i = call <vscale x 8 x i64> @llvm.riscv.vmv.v.x.nxv8i64(<vscale x 8 x i64> poison, i64 1, i64 -1)
store volatile <vscale x 8 x i64> %vmv.v.i, ptr %p
diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
index b1980fcf420a82..2a3a3a3daae4c4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll
@@ -519,6 +519,7 @@ define void @vselect_legalize_regression(<vscale x 16 x double> %a, <vscale x 16
; CHECK-NEXT: vmv.v.i v24, 0
; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0
; CHECK-NEXT: vmv1r.v v0, v7
+; CHECK-NEXT: vmv.v.i v24, 0
; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0
; CHECK-NEXT: vs8r.v v8, (a1)
; CHECK-NEXT: slli a0, a0, 3
More information about the llvm-commits
mailing list