[llvm] [RISCV] Remove hasSideEffects=1 for vsetvli pseudos (PR #91319)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 7 03:52:53 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
In a similar vein to #<!-- -->90049, it looks like we currently model all of the effects of a vsetvli pseudo:
* VL and VTYPE are marked as defs
* VL preserving x0,x0 vsetvlis doesn't get emitted until RISCVInsertVSETVLI, and when they are they have implicit-defs on VL
* Regular vector pseudos are fully modelled too: Before RISCVInsertVSETVLI they can be moved between vsetvli pseudos because we will eventually insert vsetvlis to correct VL and VTYPE. Afterwards, they will have implicit-defs on VL and VTYPE.
I may be missing something but otherwise it seems ok to remove hasSideEffects=1. This gives us some improvements like sinking in vsetvli-insert-crossbb.ll.
We need to update RISCVDeadRegisterDefinitions to keep handling vsetvli pseudos since it only operates on instructions with unmodelled side effects.
---
Patch is 222.35 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/91319.diff
38 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp (+3-1)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll (+25-13)
- (modified) llvm/test/CodeGen/RISCV/rvv/calling-conv.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll (+12-12)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll (+12-12)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll (+40-40)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll (+77-77)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll (+19-19)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll (+10-10)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+186-186)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll (+33-36)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll (+28-28)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll (+12-12)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll (+3-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll (+4-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/fmaximum-vp.ll (+95-59)
- (modified) llvm/test/CodeGen/RISCV/rvv/fminimum-vp.ll (+95-59)
- (modified) llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll (+15-15)
- (modified) llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll (+15-15)
- (modified) llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll (+115-179)
- (modified) llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll (+2-17)
- (modified) llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll (+89-108)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll (+112-156)
- (modified) llvm/test/CodeGen/RISCV/rvv/setcc-int-vp.ll (+20-20)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfma-vp.ll (+17-17)
- (modified) llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll (+17-17)
- (modified) llvm/test/CodeGen/RISCV/rvv/vpmerge-sdnode.ll (+7-7)
- (modified) llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll (+3-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll (+19-20)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll (+11-9)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll (+3-2)
- (modified) llvm/test/CodeGen/RISCV/spill-fpr-scalar.ll (+6-18)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
index 5e6b7891449fe..7de48d8218f06 100644
--- a/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
+++ b/llvm/lib/Target/RISCV/RISCVDeadRegisterDefinitions.cpp
@@ -72,7 +72,9 @@ bool RISCVDeadRegisterDefinitions::runOnMachineFunction(MachineFunction &MF) {
// are reserved for HINT instructions.
const MCInstrDesc &Desc = MI.getDesc();
if (!Desc.mayLoad() && !Desc.mayStore() &&
- !Desc.hasUnmodeledSideEffects())
+ !Desc.hasUnmodeledSideEffects() &&
+ MI.getOpcode() != RISCV::PseudoVSETVLI &&
+ MI.getOpcode() != RISCV::PseudoVSETIVLI)
continue;
// For PseudoVSETVLIX0, Rd = X0 has special meaning.
if (MI.getOpcode() == RISCV::PseudoVSETVLIX0)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 4adc26f628914..317a6d7d4c52f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -6181,7 +6181,7 @@ let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Size = 0,
//===----------------------------------------------------------------------===//
// Pseudos.
-let hasSideEffects = 1, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [VL, VTYPE] in {
// Due to rs1=X0 having special meaning, we need a GPRNoX0 register class for
// the when we aren't using one of the special X0 encodings. Otherwise it could
// be accidentally be made X0 by MachineIR optimizations. To satisfy the
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
index 187f758b78020..0a7fa38b0c8ab 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll
@@ -236,11 +236,12 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 4
+; CHECK-NEXT: li a3, 24
+; CHECK-NEXT: mul a1, a1, a3
; CHECK-NEXT: sub sp, sp, a1
-; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
; CHECK-NEXT: csrr a1, vlenb
-; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: slli a1, a1, 4
; CHECK-NEXT: add a1, sp, a1
; CHECK-NEXT: addi a1, a1, 16
; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
@@ -248,29 +249,40 @@ define fastcc <vscale x 32 x i32> @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 3
; CHECK-NEXT: add a3, a2, a1
-; CHECK-NEXT: vl8re32.v v8, (a3)
-; CHECK-NEXT: addi a3, sp, 16
-; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill
; CHECK-NEXT: add a1, a0, a1
-; CHECK-NEXT: vl8re32.v v0, (a0)
; CHECK-NEXT: vl8re32.v v8, (a1)
-; CHECK-NEXT: vl8re32.v v16, (a2)
+; CHECK-NEXT: csrr a1, vlenb
+; CHECK-NEXT: slli a1, a1, 3
+; CHECK-NEXT: add a1, sp, a1
+; CHECK-NEXT: addi a1, a1, 16
+; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re32.v v0, (a0)
; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma
+; CHECK-NEXT: vl8re32.v v8, (a3)
+; CHECK-NEXT: addi a0, sp, 16
+; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill
+; CHECK-NEXT: vl8re32.v v16, (a2)
; CHECK-NEXT: vadd.vv v0, v24, v0
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: slli a0, a0, 4
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vadd.vv v8, v24, v8
+; CHECK-NEXT: csrr a0, vlenb
+; CHECK-NEXT: slli a0, a0, 3
+; CHECK-NEXT: add a0, sp, a0
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vadd.vv v24, v24, v8
; CHECK-NEXT: addi a0, sp, 16
-; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; CHECK-NEXT: vadd.vv v8, v8, v24
+; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload
+; CHECK-NEXT: vadd.vv v8, v24, v8
; CHECK-NEXT: vadd.vv v24, v0, v16
; CHECK-NEXT: vadd.vx v16, v8, a4
; CHECK-NEXT: vadd.vx v8, v24, a4
; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 4
+; CHECK-NEXT: li a1, 24
+; CHECK-NEXT: mul a0, a0, a1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
index 647d3158b6167..fa62143546df6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv.ll
@@ -39,11 +39,11 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV32-NEXT: vs8r.v v8, (a0)
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
-; RV32-NEXT: add a0, a0, a1
-; RV32-NEXT: vs8r.v v16, (a0)
+; RV32-NEXT: add a1, a0, a1
; RV32-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; RV32-NEXT: vmv.v.i v8, 0
; RV32-NEXT: addi a0, sp, 128
+; RV32-NEXT: vs8r.v v16, (a1)
; RV32-NEXT: vmv.v.i v16, 0
; RV32-NEXT: call callee_scalable_vector_split_indirect
; RV32-NEXT: addi sp, s0, -144
@@ -70,11 +70,11 @@ define <vscale x 32 x i32> @caller_scalable_vector_split_indirect(<vscale x 32 x
; RV64-NEXT: vs8r.v v8, (a0)
; RV64-NEXT: csrr a1, vlenb
; RV64-NEXT: slli a1, a1, 3
-; RV64-NEXT: add a0, a0, a1
-; RV64-NEXT: vs8r.v v16, (a0)
+; RV64-NEXT: add a1, a0, a1
; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma
; RV64-NEXT: vmv.v.i v8, 0
; RV64-NEXT: addi a0, sp, 128
+; RV64-NEXT: vs8r.v v16, (a1)
; RV64-NEXT: vmv.v.i v16, 0
; RV64-NEXT: call callee_scalable_vector_split_indirect
; RV64-NEXT: addi sp, s0, -144
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll
index 1b50214bbf164..9e9a8b8a4b644 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll
@@ -19,9 +19,9 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x half> %r
@@ -42,9 +42,9 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x half> %r
@@ -65,9 +65,9 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x half> %r
@@ -88,9 +88,9 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <16 x half> %r
@@ -112,9 +112,9 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <32 x half> @llvm.experimental.constrained.nearbyint.v32f16(<32 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <32 x half> %r
@@ -135,9 +135,9 @@ define <2 x float> @nearbyint_v2f32(<2 x float> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <2 x float> @llvm.experimental.constrained.nearbyint.v2f32(<2 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x float> %r
@@ -158,9 +158,9 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x float> %r
@@ -181,9 +181,9 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x float> %r
@@ -204,9 +204,9 @@ define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <16 x float> %r
@@ -227,9 +227,9 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <2 x double> %r
@@ -250,9 +250,9 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <4 x double> %r
@@ -273,9 +273,9 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp {
; CHECK-NEXT: frflags a0
; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t
; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t
-; CHECK-NEXT: fsflags a0
; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu
; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t
+; CHECK-NEXT: fsflags a0
; CHECK-NEXT: ret
%r = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <8 x double> %r
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index a8e4af2d7368e..6320b07125bb0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -359,13 +359,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: feq.d a0, fa3, fa3
; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; RV32-NEXT: fld fa2, 40(sp)
; RV32-NEXT: fcvt.w.d a2, fa3, rtz
+; RV32-NEXT: fld fa3, 40(sp)
; RV32-NEXT: neg a0, a0
; RV32-NEXT: and a0, a0, a2
-; RV32-NEXT: feq.d a2, fa2, fa2
-; RV32-NEXT: fmax.d fa3, fa2, fa5
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV32-NEXT: feq.d a2, fa3, fa3
+; RV32-NEXT: fmax.d fa3, fa3, fa5
; RV32-NEXT: fmin.d fa3, fa3, fa4
; RV32-NEXT: fcvt.w.d a3, fa3, rtz
; RV32-NEXT: fld fa3, 32(sp)
@@ -460,13 +460,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: feq.d a0, fa3, fa3
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
-; RV64-NEXT: fld fa2, 40(sp)
; RV64-NEXT: fcvt.l.d a2, fa3, rtz
+; RV64-NEXT: fld fa3, 40(sp)
; RV64-NEXT: neg a0, a0
; RV64-NEXT: and a0, a0, a2
-; RV64-NEXT: feq.d a2, fa2, fa2
-; RV64-NEXT: fmax.d fa3, fa2, fa5
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; RV64-NEXT: feq.d a2, fa3, fa3
+; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a3, fa3, rtz
; RV64-NEXT: fld fa3, 32(sp)
@@ -557,7 +557,6 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: vslidedown.vi v8, v8, 3
; RV32-NEXT: vfmv.f.s fa4, v8
; RV32-NEXT: fmax.d fa4, fa4, fa3
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: fld fa2, 40(sp)
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
@@ -566,9 +565,10 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fmin.d fa2, fa2, fa5
; RV32-NEXT: fcvt.wu.d a2, fa2, rtz
; RV32-NEXT: fmax.d fa4, fa4, fa3
-; RV32-NEXT: fld fa2, 48(sp)
; RV32-NEXT: fmin.d fa4, fa4, fa5
+; RV32-NEXT: fld fa2, 48(sp)
; RV32-NEXT: fcvt.wu.d a3, fa4, rtz
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV32-NEXT: vslide1down.vx v8, v10, a0
; RV32-NEXT: fmax.d fa4, fa2, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
@@ -633,7 +633,6 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: vslidedown.vi v8, v8, 3
; RV64-NEXT: vfmv.f.s fa4, v8
; RV64-NEXT: fmax.d fa4, fa4, fa3
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64-NEXT: fld fa2, 40(sp)
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
@@ -642,9 +641,10 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: fmin.d fa2, fa2, fa5
; RV64-NEXT: fcvt.lu.d a2, fa2, rtz
; RV64-NEXT: fmax.d fa4, fa4, fa3
-; RV64-NEXT: fld fa2, 48(sp)
; RV64-NEXT: fmin.d fa4, fa4, fa5
+; RV64-NEXT: fld fa2, 48(sp)
; RV64-NEXT: fcvt.lu.d a3, fa4, rtz
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; RV64-NEXT: vslide1down.vx v8, v10, a0
; RV64-NEXT: fmax.d fa4, fa2, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index 6ffa6ac250ed7..9c76b83d0974a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -132,12 +132,12 @@ define <3 x float> @si2fp_v3i1_v3f32(<3 x i1> %x) {
define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
; ZVFH32-LABEL: si2fp_v3i7_v3f32:
; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: lw a1, 4(a0)
-; ZVFH32-NEXT: lw a2, 0(a0)
-; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH32-NEXT: lw a1, 0(a0)
+; ZVFH32-NEXT: lw a2, 4(a0)
; ZVFH32-NEXT: lw a0, 8(a0)
-; ZVFH32-NEXT: vmv.v.x v8, a2
-; ZVFH32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH32-NEXT: vmv.v.x v8, a1
+; ZVFH32-NEXT: vslide1down.vx v8, v8, a2
; ZVFH32-NEXT: vslide1down.vx v8, v8, a0
; ZVFH32-NEXT: vslidedown.vi v8, v8, 1
; ZVFH32-NEXT: vadd.vv v8, v8, v8
@@ -149,12 +149,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; ZVFH64-LABEL: si2fp_v3i7_v3f32:
; ZVFH64: # %bb.0:
-; ZVFH64-NEXT: ld a1, 8(a0)
-; ZVFH64-NEXT: ld a2, 0(a0)
-; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH64-NEXT: ld a1, 0(a0)
+; ZVFH64-NEXT: ld a2, 8(a0)
; ZVFH64-NEXT: ld a0, 16(a0)
-; ZVFH64-NEXT: vmv.v.x v8, a2
-; ZVFH64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFH64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH64-NEXT: vmv.v.x v8, a1
+; ZVFH64-NEXT: vslide1down.vx v8, v8, a2
; ZVFH64-NEXT: vslide1down.vx v8, v8, a0
; ZVFH64-NEXT: vslidedown.vi v8, v8, 1
; ZVFH64-NEXT: vadd.vv v8, v8, v8
@@ -166,12 +166,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; ZVFHMIN32-LABEL: si2fp_v3i7_v3f32:
; ZVFHMIN32: # %bb.0:
-; ZVFHMIN32-NEXT: lw a1, 4(a0)
-; ZVFHMIN32-NEXT: lw a2, 0(a0)
-; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFHMIN32-NEXT: lw a1, 0(a0)
+; ZVFHMIN32-NEXT: lw a2, 4(a0)
; ZVFHMIN32-NEXT: lw a0, 8(a0)
-; ZVFHMIN32-NEXT: vmv.v.x v8, a2
-; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFHMIN32-NEXT: vmv.v.x v8, a1
+; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a2
; ZVFHMIN32-NEXT: vslide1down.vx v8, v8, a0
; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 1
; ZVFHMIN32-NEXT: vadd.vv v8, v8, v8
@@ -183,12 +183,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
;
; ZVFHMIN64-LABEL: si2fp_v3i7_v3f32:
; ZVFHMIN64: # %bb.0:
-; ZVFHMIN64-NEXT: ld a1, 8(a0)
-; ZVFHMIN64-NEXT: ld a2, 0(a0)
-; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFHMIN64-NEXT: ld a1, 0(a0)
+; ZVFHMIN64-NEXT: ld a2, 8(a0)
; ZVFHMIN64-NEXT: ld a0, 16(a0)
-; ZVFHMIN64-NEXT: vmv.v.x v8, a2
-; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a1
+; ZVFHMIN64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFHMIN64-NEXT: vmv.v.x v8, a1
+; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a2
; ZVFHMIN64-NEXT: vslide1down.vx v8, v8, a0
; ZVFHMIN64-NEXT: vslidedown.vi v8, v8, 1
; ZVFHMIN64-NEXT: vadd.vv v8, v8, v8
@@ -205,12 +205,12 @@ define <3 x float> @si2fp_v3i7_v3f32(<3 x i7> %x) {
define <3 x float> @ui2fp_v3i7_v3f32(<3 x i7> %x) {
; ZVFH32-LABEL: ui2fp_v3i7_v3f32:
; ZVFH32: # %bb.0:
-; ZVFH32-NEXT: lw a1, 4(a0)
-; ZVFH32-NEXT: lw a2, 0(a0)
-; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH32-NEXT: lw a1, 0(a0)
+; ZVFH32-NEXT: lw a2, 4(a0)
; ZVFH32-NEXT: lw a0, 8(a0)
-; ZVFH32-NEXT: vmv.v.x v8, a2
-; ZVFH32-NEXT: vslide1down.vx v8, v8, a1
+; ZVFH32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; ZVFH32-NEXT: vmv.v.x v8, a1
+; ZVFH32-NEXT: vslide1down.vx v8, v8, a2
; ZVFH32-NEXT: vslide1down.vx v8, v8, a0
; ZVFH32-NEXT: vslidedown.vi v8, v8, 1
; ZVFH32-NEXT: li a0, 127
@@ -222,12 +222,12 @@ define <3 x floa...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/91319
More information about the llvm-commits
mailing list