[llvm] [RISCV] Fix v[f]slide1down.vx having VL changed (PR #106110)

Mon Aug 26 11:00:28 PDT 2024

https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/106110

v[f]slide1down.vx uses VL to determine where the element is inserted into, so changing the VL changes the result.

This fixes this by setting ActiveElementsAffectsResult, but it's overly conservative. We should relax this later by modelling that it's ok to change the mask, just not VL.

Fixes #106109


>From 23cc3d1bb10b480ac825a15efc52141aec8355fd Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 27 Aug 2024 01:50:54 +0800
Subject: [PATCH 1/2] Precommit test

---
 llvm/test/CodeGen/RISCV/rvv/pr106109.ll | 15 +++++++++++++++
 1 file changed, 15 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/pr106109.ll

diff --git a/llvm/test/CodeGen/RISCV/rvv/pr106109.ll b/llvm/test/CodeGen/RISCV/rvv/pr106109.ll
new file mode 100644
index 00000000000000..d39115a8f4f999
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/pr106109.ll
@@ -0,0 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s
+
+define <vscale x 4 x float> @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %false, float %1, <vscale x 4 x i1> %mask) {
+; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
+; CHECK-NEXT:    vfslide1down.vf v10, v8, fa0, v0.t
+; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.vfslide1down.nxv4f32.f32(<vscale x 4 x float> undef, <vscale x 4 x float> %0, float %1, i64 4)
+  %b = call <vscale x 4 x float> @llvm.riscv.vmerge.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x float> %false, <vscale x 4 x float> %a, <vscale x 4 x i1> %mask, i64 1)
+  ret <vscale x 4 x float> %b
+}

>From 9d96a1d9120976f595ffbb406ecae1081a33ca20 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 27 Aug 2024 01:55:34 +0800
Subject: [PATCH 2/2] [RISCV] Fix v[f]slide1down.vx having VL changed

v[f]slide1down.vx uses VL to determine where the element is inserted into, so changing the VL changes the result.

This fixes this by setting ActiveElementsAffectsResult, but it's overly conservative. We should relax this later by modelling that it's ok to change the mask, just not VL.

Fixes #106109
---
 llvm/lib/Target/RISCV/RISCVInstrInfoV.td                | 2 ++
 llvm/test/CodeGen/RISCV/rvv/pr106109.ll                 | 7 ++++---
 llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll | 7 +++++--
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
index a84e92b0fda262..878859fc1d0864 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoV.td
@@ -1665,6 +1665,7 @@ defm VSLIDEUP_V : VSLD_IV_X_I<"vslideup", 0b001110, /*slidesUp=*/true>;
 defm VSLIDE1UP_V : VSLD1_MV_X<"vslide1up", 0b001110>;
 } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
 defm VSLIDEDOWN_V : VSLD_IV_X_I<"vslidedown", 0b001111, /*slidesUp=*/false>;
+let ActiveElementsAffectResult = 1 in
 defm VSLIDE1DOWN_V : VSLD1_MV_X<"vslide1down", 0b001111>;
 } // Predicates = [HasVInstructions]
 
@@ -1672,6 +1673,7 @@ let Predicates = [HasVInstructionsAnyF] in {
 let Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp in {
 defm VFSLIDE1UP_V : VSLD1_FV_F<"vfslide1up", 0b001110>;
 } // Constraints = "@earlyclobber $vd", RVVConstraint = SlideUp
+let ActiveElementsAffectResult = 1 in
 defm VFSLIDE1DOWN_V : VSLD1_FV_F<"vfslide1down", 0b001111>;
 } // Predicates = [HasVInstructionsAnyF]
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/pr106109.ll b/llvm/test/CodeGen/RISCV/rvv/pr106109.ll
index d39115a8f4f999..683753e19f2d5a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/pr106109.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/pr106109.ll
@@ -4,9 +4,10 @@
 define <vscale x 4 x float> @intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32(<vscale x 4 x float> %0, <vscale x 4 x float> %false, float %1, <vscale x 4 x i1> %mask) {
 ; CHECK-LABEL: intrinsic_vfslide1down_vf_nxv4f32_nxv4f32_f32:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, mu
-; CHECK-NEXT:    vfslide1down.vf v10, v8, fa0, v0.t
-; CHECK-NEXT:    vmv.v.v v8, v10
+; CHECK-NEXT:    vsetivli zero, 4, e32, m2, ta, ma
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT:    vsetivli zero, 1, e32, m2, ta, ma
+; CHECK-NEXT:    vmerge.vvm v8, v10, v8, v0
 ; CHECK-NEXT:    ret
 entry:
   %a = call <vscale x 4 x float> @llvm.riscv.vfslide1down.nxv4f32.f32(<vscale x 4 x float> undef, <vscale x 4 x float> %0, float %1, i64 4)
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 6700920cebff0a..2ff775d6d14f43 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -776,12 +776,15 @@ define <vscale x 2 x i32> @vpselect_vslide1up(<vscale x 2 x i32> %passthru, <vsc
   ret <vscale x 2 x i32> %b
 }
 
+; FIXME: We can still fold this given that the vmerge and the vslide1down have
+; the same vl.
 declare <vscale x 2 x i32> @llvm.riscv.vslide1down.nxv2i32.i32(<vscale x 2 x i32>, <vscale x 2 x i32>, i32, i64)
 define <vscale x 2 x i32> @vpselect_vslide1down(<vscale x 2 x i32> %passthru, <vscale x 2 x i32> %v, i32 %x, <vscale x 2 x i1> %m, i32 zeroext %vl) {
 ; CHECK-LABEL: vpselect_vslide1down:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, mu
-; CHECK-NEXT:    vslide1down.vx v8, v9, a0, v0.t
+; CHECK-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
+; CHECK-NEXT:    vslide1down.vx v9, v9, a0
+; CHECK-NEXT:    vmerge.vvm v8, v8, v9, v0
 ; CHECK-NEXT:    ret
   %1 = zext i32 %vl to i64
   %a = call <vscale x 2 x i32> @llvm.riscv.vslide1down.nxv2i32.i32(<vscale x 2 x i32> undef, <vscale x 2 x i32> %v, i32 %x, i64 %1)