[llvm] [RISCV] Start vslide1down sequence with a dependency breaking splat (PR #72691)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 17 11:19:17 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Philip Reames (preames)
<details>
<summary>Changes</summary>
If we are using entirely vslide1downs to initialize an otherwise undef vector, we end up with an implicit_def as the source of the first vslide1down. This register has to be allocated, and creates false dependencies with surrounding code.
Instead, start our sequence with a vmv.v.x in the hopes of creating a dependency breaking idiom. Unfortunately, it's not clear this will actually work as due to the VL=0 special case for T.A. the hardware has to work pretty hard to recognize that the vmv.v.x actually has no source dependence. I don't think we can reasonable expect all hardware to have optimized this case, but I also don't see any downside in prefering it.
---
Patch is 72.19 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72691.diff
17 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+12-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll (+29-39)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll (+6-6)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll (+30-36)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll (+36-36)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll (+30-36)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll (+105-117)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll (+14-14)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+6-8)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll (+10-13)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll (+12-12)
- (modified) llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll (+2-2)
- (modified) llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll (+4-4)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f89f300a4e9e50c..2c7f2748472a175 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3798,13 +3798,24 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
const unsigned Policy = RISCVII::TAIL_AGNOSTIC | RISCVII::MASK_AGNOSTIC;
- SDValue Vec = DAG.getUNDEF(ContainerVT);
+ SDValue Vec;
UndefCount = 0;
for (SDValue V : Op->ops()) {
if (V.isUndef()) {
UndefCount++;
continue;
}
+
+ // Start our sequence with a TA splat in the hopes that hardware is able to
+ // recognize there's no dependency on the prior value of our temporary
+ // register.
+ if (!Vec) {
+ Vec = DAG.getSplatVector(VT, DL, V);
+ Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
+ UndefCount = 0;
+ continue;
+ }
+
if (UndefCount) {
const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT());
Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT),
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll
index 47edb9eecb00bc1..bfa4c0adf6eb7a2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast.ll
@@ -495,7 +495,7 @@ define <4 x i16> @bitcast_i64_v4i16(i64 %a) {
; RV32ELEN32-LABEL: bitcast_i64_v4i16:
; RV32ELEN32: # %bb.0:
; RV32ELEN32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV32ELEN32-NEXT: vslide1down.vx v8, v8, a0
+; RV32ELEN32-NEXT: vmv.v.x v8, a0
; RV32ELEN32-NEXT: vslide1down.vx v8, v8, a1
; RV32ELEN32-NEXT: ret
;
@@ -530,7 +530,7 @@ define <2 x i32> @bitcast_i64_v2i32(i64 %a) {
; RV32ELEN32-LABEL: bitcast_i64_v2i32:
; RV32ELEN32: # %bb.0:
; RV32ELEN32-NEXT: vsetivli zero, 2, e32, m1, ta, ma
-; RV32ELEN32-NEXT: vslide1down.vx v8, v8, a0
+; RV32ELEN32-NEXT: vmv.v.x v8, a0
; RV32ELEN32-NEXT: vslide1down.vx v8, v8, a1
; RV32ELEN32-NEXT: ret
;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index 2ffca983ac1023f..d0ff85d1179cfdb 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -6,7 +6,7 @@ define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: add_constant_rhs:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -30,7 +30,7 @@ define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e,
; CHECK-LABEL: add_constant_rhs_8xi32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vslide1down.vx v8, v8, a3
@@ -67,7 +67,7 @@ define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: sub_constant_rhs:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI2_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -91,7 +91,7 @@ define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: mul_constant_rhs:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI3_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -115,7 +115,7 @@ define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: udiv_constant_rhs:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -152,7 +152,7 @@ define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: fadd_constant_rhs:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -176,7 +176,7 @@ define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: fdiv_constant_rhs:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -200,7 +200,7 @@ define <4 x i32> @add_constant_rhs_splat(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: add_constant_rhs_splat:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: vslide1down.vx v8, v8, a1
; CHECK-NEXT: vslide1down.vx v8, v8, a2
; CHECK-NEXT: vslide1down.vx v8, v8, a3
@@ -226,7 +226,7 @@ define <4 x i32> @add_constant_rhs_with_identity(i32 %a, i32 %b, i32 %c, i32 %d)
; RV32-NEXT: addi a3, a3, 2047
; RV32-NEXT: addi a3, a3, 308
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
@@ -239,7 +239,7 @@ define <4 x i32> @add_constant_rhs_with_identity(i32 %a, i32 %b, i32 %c, i32 %d)
; RV64-NEXT: addi a3, a3, 2047
; RV64-NEXT: addiw a3, a3, 308
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vslide1down.vx v8, v8, a2
; RV64-NEXT: vslide1down.vx v8, v8, a3
@@ -263,7 +263,7 @@ define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
; RV32-NEXT: addi a3, a3, 2047
; RV32-NEXT: addi a3, a3, 308
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
@@ -276,7 +276,7 @@ define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
; RV64-NEXT: addi a3, a3, 2047
; RV64-NEXT: addiw a3, a3, 308
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vslide1down.vx v8, v8, a2
; RV64-NEXT: vslide1down.vx v8, v8, a3
@@ -293,25 +293,15 @@ define <4 x i32> @add_constant_rhs_identity(i32 %a, i32 %b, i32 %c, i32 %d) {
}
define <4 x i32> @add_constant_rhs_identity2(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: add_constant_rhs_identity2:
-; RV32: # %bb.0:
-; RV32-NEXT: addi a0, a0, 23
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: ret
-;
-; RV64-LABEL: add_constant_rhs_identity2:
-; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, 23
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: ret
+; CHECK-LABEL: add_constant_rhs_identity2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: addi a0, a0, 23
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: ret
%e0 = add i32 %a, 23
%v0 = insertelement <4 x i32> poison, i32 %e0, i32 0
%v1 = insertelement <4 x i32> %v0, i32 %b, i32 1
@@ -324,7 +314,7 @@ define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: add_constant_rhs_inverse:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI11_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -348,7 +338,7 @@ define <4 x i32> @add_constant_rhs_commute(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: add_constant_rhs_commute:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vmv.v.x v8, a0
; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0)
; CHECK-NEXT: vle32.v v9, (a0)
@@ -377,7 +367,7 @@ define <4 x i32> @add_general_rhs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f
; RV32-NEXT: add a2, a2, a6
; RV32-NEXT: add a3, a3, a7
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
@@ -385,12 +375,12 @@ define <4 x i32> @add_general_rhs(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f
;
; RV64-LABEL: add_general_rhs:
; RV64: # %bb.0:
-; RV64-NEXT: addw a0, a0, a4
+; RV64-NEXT: add a0, a0, a4
; RV64-NEXT: addw a1, a1, a5
; RV64-NEXT: addw a2, a2, a6
; RV64-NEXT: addw a3, a3, a7
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vslide1down.vx v8, v8, a2
; RV64-NEXT: vslide1down.vx v8, v8, a3
@@ -414,7 +404,7 @@ define <4 x i32> @add_general_splat(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
; RV32-NEXT: add a2, a2, a4
; RV32-NEXT: add a3, a3, a4
; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
+; RV32-NEXT: vmv.v.x v8, a0
; RV32-NEXT: vslide1down.vx v8, v8, a1
; RV32-NEXT: vslide1down.vx v8, v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a3
@@ -422,12 +412,12 @@ define <4 x i32> @add_general_splat(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
;
; RV64-LABEL: add_general_splat:
; RV64: # %bb.0:
-; RV64-NEXT: addw a0, a0, a4
+; RV64-NEXT: add a0, a0, a4
; RV64-NEXT: addw a1, a1, a4
; RV64-NEXT: addw a2, a2, a4
; RV64-NEXT: addw a3, a3, a4
; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
+; RV64-NEXT: vmv.v.x v8, a0
; RV64-NEXT: vslide1down.vx v8, v8, a1
; RV64-NEXT: vslide1down.vx v8, v8, a2
; RV64-NEXT: vslide1down.vx v8, v8, a3
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
index 2d8bae7092242d3..05aa5f9807b9fc4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll
@@ -252,7 +252,7 @@ define <2 x half> @buildvec_v2f16(half %a, half %b) {
; CHECK-LABEL: buildvec_v2f16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: ret
%v1 = insertelement <2 x half> poison, half %a, i64 0
@@ -264,7 +264,7 @@ define <2 x float> @buildvec_v2f32(float %a, float %b) {
; CHECK-LABEL: buildvec_v2f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: ret
%v1 = insertelement <2 x float> poison, float %a, i64 0
@@ -276,7 +276,7 @@ define <2 x double> @buildvec_v2f64(double %a, double %b) {
; CHECK-LABEL: buildvec_v2f64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: ret
%v1 = insertelement <2 x double> poison, double %a, i64 0
@@ -288,7 +288,7 @@ define <2 x double> @buildvec_v2f64_b(double %a, double %b) {
; CHECK-LABEL: buildvec_v2f64_b:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: ret
%v1 = insertelement <2 x double> poison, double %b, i64 1
@@ -300,7 +300,7 @@ define <4 x float> @buildvec_v4f32(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: buildvec_v4f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
@@ -316,7 +316,7 @@ define <8 x float> @buildvec_v8f32(float %e0, float %e1, float %e2, float %e3, f
; CHECK-LABEL: buildvec_v8f32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: vfmv.v.f v8, fa0
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
index 645b53727a059b9..ed0b15c6add5cd0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i-sat.ll
@@ -211,7 +211,7 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) {
; RV32-NEXT: fcvt.w.d a3, fa5, rtz
; RV32-NEXT: and a2, a2, a3
; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a2
+; RV32-NEXT: vmv.v.x v8, a2
; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vse8.v v8, (a1)
; RV32-NEXT: ret
@@ -234,13 +234,13 @@ define void @fp2si_v2f64_v2i8(ptr %x, ptr %y) {
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: vfmv.f.s fa5, v8
; RV64-NEXT: feq.d a2, fa5, fa5
-; RV64-NEXT: neg a2, a2
+; RV64-NEXT: negw a2, a2
; RV64-NEXT: fmax.d fa5, fa5, fa4
; RV64-NEXT: fmin.d fa5, fa5, fa3
; RV64-NEXT: fcvt.l.d a3, fa5, rtz
; RV64-NEXT: and a2, a2, a3
; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a2
+; RV64-NEXT: vmv.v.x v8, a2
; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: vse8.v v8, (a1)
; RV64-NEXT: ret
@@ -256,23 +256,21 @@ define void @fp2ui_v2f64_v2i8(ptr %x, ptr %y) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vslidedown.vi v9, v8, 1
; RV32-NEXT: lui a0, %hi(.LCPI11_0)
; RV32-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
-; RV32-NEXT: vfmv.f.s fa4, v8
+; RV32-NEXT: vfmv.f.s fa4, v9
; RV32-NEXT: fcvt.d.w fa3, zero
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV32-NEXT: vslide1down.vx v9, v8, a0
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v8, 1
; RV32-NEXT: vfmv.f.s fa4, v8
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa5, fa4, fa5
-; RV32-NEXT: fcvt.wu.d a0, fa5, rtz
-; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v9, a0
+; RV32-NEXT: fcvt.wu.d a2, fa5, rtz
+; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV32-NEXT: vmv.v.x v8, a2
+; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: vse8.v v8, (a1)
; RV32-NEXT: ret
;
@@ -280,23 +278,21 @@ define void @fp2ui_v2f64_v2i8(ptr %x, ptr %y) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vslidedown.vi v9, v8, 1
; RV64-NEXT: lui a0, %hi(.LCPI11_0)
; RV64-NEXT: fld fa5, %lo(.LCPI11_0)(a0)
-; RV64-NEXT: vfmv.f.s fa4, v8
+; RV64-NEXT: vfmv.f.s fa4, v9
; RV64-NEXT: fmv.d.x fa3, zero
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa4, fa4, fa5
; RV64-NEXT: fcvt.lu.d a0, fa4, rtz
-; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
-; RV64-NEXT: vslide1down.vx v9, v8, a0
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 1
; RV64-NEXT: vfmv.f.s fa4, v8
; RV64-NEXT: fmax.d fa4, fa4, fa3
; RV64-NEXT: fmin.d fa5, fa4, fa5
-; RV64-NEXT: fcvt.lu.d a0, fa5, rtz
-; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v9, a0
+; RV64-NEXT: fcvt.lu.d a2, fa5, rtz
+; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, ma
+; RV64-NEXT: vmv.v.x v8, a2
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: vse8.v v8, (a1)
; RV64-NEXT: ret
%a = load <2 x double>, ptr %x
@@ -344,7 +340,7 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: fcvt.w.d a3, fa3, rtz
; RV32-NEXT: and a2, a2, a3
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v10, v8, a2
+; RV32-NEXT: vmv.v.x v10, a2
; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT: vslidedown.vi v12, v8, 2
@@ -437,13 +433,13 @@ define void @fp2si_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: and a0, a0, a2
; RV64-NEXT: vfmv.f.s fa3, v8
; RV64-NEXT: feq.d a2, fa3, fa3
-; RV64-NEXT: neg a2, a2
+; RV64-NEXT: negw a2, a2
; RV64-NEXT: fmax.d fa3, fa3, fa5
; RV64-NEXT: fmin.d fa3, fa3, fa4
; RV64-NEXT: fcvt.l.d a3, fa3, rtz
; RV64-NEXT: and a2, a2, a3
; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vslide1down.vx v10, v8, a2
+; RV64-NEXT: vmv.v.x v10, a2
; RV64-NEXT: vslide1down.vx v10, v10, a0
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vi v12, v8, 2
@@ -529,22 +525,21 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: mv a0, sp
; RV32-NEXT: vse64.v v8, (a0)
+; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV32-NEXT: vslidedown.vi v10, v8, 1
; RV32-NEXT: lui a0, %hi(.LCPI13_0)
; RV32-NEXT: fld fa5, %lo(.LCPI13_0)(a0)
-; RV32-NEXT: vfmv.f.s fa4, v8
+; RV32-NEXT: vfmv.f.s fa4, v10
; RV32-NEXT: fcvt.d.w fa3, zero
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
-; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v10, v8, a0
-; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v11, v8, 1
-; RV32-NEXT: vfmv.f.s fa4, v11
+; RV32-NEXT: vfmv.f.s fa4, v8
; RV32-NEXT: fmax.d fa4, fa4, fa3
; RV32-NEXT: fmin.d fa4, fa4, fa5
-; RV32-NEXT: fcvt.wu.d a0, fa4, rtz
+; RV32-NEXT: fcvt.wu.d a2, fa4, rtz
; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vmv.v.x v10, a2
; RV32-NEXT: vslide1down.vx v10, v10, a0
; RV32-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV32-NEXT: vslidedown.vi v12, v8, 2
@@ -604,22 +599,21 @@ define void @fp2ui_v8f64_v8i8(ptr %x, ptr %y) {
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: mv a0, sp
; RV64-NEXT: vse64.v v8, (a0)
+; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
+; RV64-NEXT: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/72691
More information about the llvm-commits
mailing list