[llvm] [RISCV] Transform build_vector((binop X_i, C_i)..) to binop (build_ve… (PR #67358)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 26 07:52:01 PDT 2023
https://github.com/preames updated https://github.com/llvm/llvm-project/pull/67358
>From 57bbd485e77809cd1a52a3b567d0d553be2695ae Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Wed, 6 Sep 2023 16:30:58 -0700
Subject: [PATCH 1/4] [RISCV] Transform build_vector((binop X_i, C_i)..) to
binop (build_vector, build_vector)
If we have a build_vector of identical binops, we'd prefer to have a single vector binop in most cases. We do need to make sure that the two build_vectors aren't more difficult to materialize than the original build_vector. To start with, let's restrict ourselves to the case where one build_vector is a fully constant vector.
Note that we don't need to worry about speculation safety here. We are not speculating any of the lanes, and thus none of the typical e.g. div-by-zero concerns apply.
I'll highlight that the constant build_vector heuristic is just one we could chose here. We just need some way to be reasonable sure the cost of the two build_vectors isn't going to completely outweigh the savings from the binop formation. I'm open to alternate heuristics here - both more restrictive and more permissive.
As noted in comments, we can extend this in a number of ways. I decided to start small as a) that helps keep things understandable in review and b) it covers my actual motivating case.
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 57 +-
.../rvv/fixed-vectors-buildvec-of-binop.ll | 411 +++++---------
.../RISCV/rvv/fixed-vectors-vselect.ll | 513 ++++++------------
.../CodeGen/RISCV/srem-seteq-illegal-types.ll | 81 +--
.../CodeGen/RISCV/urem-seteq-illegal-types.ll | 55 +-
5 files changed, 369 insertions(+), 748 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 686350de29883aa..c40e940f1b8aeae 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1295,7 +1295,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
- ISD::CONCAT_VECTORS});
+ ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS});
if (Subtarget.hasVendorXTHeadMemPair())
setTargetDAGCombine({ISD::LOAD, ISD::STORE});
if (Subtarget.useRVVForFixedLengthVectors())
@@ -13337,6 +13337,57 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
}
+/// IF we have a build_vector where each lanes is binop X, C, where C
+/// is a constant (nut not neccessarily the same constant on all lanes),
+/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
+/// We assume that materializing a constant build vector will be no more
+/// expensive that performing O(n) binops.
+static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget,
+ const RISCVTargetLowering &TLI) {
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+
+ assert(!VT.isScalableVector() && "unexpected build vector");
+
+ if (VT.getVectorNumElements() == 1)
+ return SDValue();
+
+ const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
+ if (!TLI.isBinOp(Opcode))
+ return SDValue();
+
+ if (!TLI.isOperationLegalOrCustom(Opcode, VT) ||
+ !TLI.isTypeLegal(VT))
+ return SDValue();
+
+ SmallVector<SDValue> LHSOps;
+ SmallVector<SDValue> RHSOps;
+ for (SDValue Op : N->ops()) {
+ if (Op.isUndef()) {
+ LHSOps.push_back(Op);
+ RHSOps.push_back(Op);
+ continue;
+ }
+
+ // TODO: We can handle operations which have an neutral rhs value
+ // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
+ // of profit in a more explicit manner.
+ if (Op.getOpcode() != Opcode || !Op.hasOneUse())
+ return SDValue();
+
+ LHSOps.push_back(Op.getOperand(0));
+ if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
+ !isa<ConstantFPSDNode>(Op.getOperand(1)))
+ return SDValue();
+ RHSOps.push_back(Op.getOperand(1));
+ }
+
+ return DAG.getNode(Opcode, DL, VT,
+ DAG.getBuildVector(VT, DL, LHSOps),
+ DAG.getBuildVector(VT, DL, RHSOps));
+}
+
// If we're concatenating a series of vector loads like
// concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
// Then we can turn this into a strided load by widening the vector elements
@@ -14399,6 +14450,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return Gather;
break;
}
+ case ISD::BUILD_VECTOR:
+ if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
+ return V;
+ break;
case ISD::CONCAT_VECTORS:
if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
return V;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index 37a43c3550a5282..9e4584eb17ff9a7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -3,33 +3,18 @@
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: add_constant_rhs:
-; RV32: # %bb.0:
-; RV32-NEXT: addi a0, a0, 23
-; RV32-NEXT: addi a1, a1, 25
-; RV32-NEXT: addi a2, a2, 1
-; RV32-NEXT: addi a3, a3, 2047
-; RV32-NEXT: addi a3, a3, 308
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: ret
-;
-; RV64-LABEL: add_constant_rhs:
-; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, 23
-; RV64-NEXT: addiw a1, a1, 25
-; RV64-NEXT: addiw a2, a2, 1
-; RV64-NEXT: addiw a3, a3, 2047
-; RV64-NEXT: addiw a3, a3, 308
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: ret
+; CHECK-LABEL: add_constant_rhs:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: lui a0, %hi(.LCPI0_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0)
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
%e0 = add i32 %a, 23
%e1 = add i32 %b, 25
%e2 = add i32 %c, 1
@@ -42,49 +27,22 @@ define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
}
define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
-; RV32-LABEL: add_constant_rhs_8xi32:
-; RV32: # %bb.0:
-; RV32-NEXT: addi a0, a0, 23
-; RV32-NEXT: addi a1, a1, 25
-; RV32-NEXT: addi a2, a2, 1
-; RV32-NEXT: addi a3, a3, 2047
-; RV32-NEXT: addi a3, a3, 308
-; RV32-NEXT: addi a4, a4, 23
-; RV32-NEXT: addi a5, a5, 23
-; RV32-NEXT: addi a6, a6, 22
-; RV32-NEXT: addi a7, a7, 23
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: vslide1down.vx v8, v8, a4
-; RV32-NEXT: vslide1down.vx v8, v8, a5
-; RV32-NEXT: vslide1down.vx v8, v8, a6
-; RV32-NEXT: vslide1down.vx v8, v8, a7
-; RV32-NEXT: ret
-;
-; RV64-LABEL: add_constant_rhs_8xi32:
-; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, 23
-; RV64-NEXT: addiw a1, a1, 25
-; RV64-NEXT: addiw a2, a2, 1
-; RV64-NEXT: addiw a3, a3, 2047
-; RV64-NEXT: addiw a3, a3, 308
-; RV64-NEXT: addiw a4, a4, 23
-; RV64-NEXT: addiw a5, a5, 23
-; RV64-NEXT: addiw a6, a6, 22
-; RV64-NEXT: addiw a7, a7, 23
-; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: vslide1down.vx v8, v8, a4
-; RV64-NEXT: vslide1down.vx v8, v8, a5
-; RV64-NEXT: vslide1down.vx v8, v8, a6
-; RV64-NEXT: vslide1down.vx v8, v8, a7
-; RV64-NEXT: ret
+; CHECK-LABEL: add_constant_rhs_8xi32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vslide1down.vx v8, v8, a4
+; CHECK-NEXT: lui a0, %hi(.LCPI1_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI1_0)
+; CHECK-NEXT: vle32.v v10, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, a5
+; CHECK-NEXT: vslide1down.vx v8, v8, a6
+; CHECK-NEXT: vslide1down.vx v8, v8, a7
+; CHECK-NEXT: vadd.vv v8, v8, v10
+; CHECK-NEXT: ret
%e0 = add i32 %a, 23
%e1 = add i32 %b, 25
%e2 = add i32 %c, 1
@@ -106,33 +64,18 @@ define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e,
define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: sub_constant_rhs:
-; RV32: # %bb.0:
-; RV32-NEXT: addi a0, a0, -23
-; RV32-NEXT: addi a1, a1, -25
-; RV32-NEXT: addi a2, a2, -1
-; RV32-NEXT: addi a3, a3, -2048
-; RV32-NEXT: addi a3, a3, -307
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: ret
-;
-; RV64-LABEL: sub_constant_rhs:
-; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, -23
-; RV64-NEXT: addiw a1, a1, -25
-; RV64-NEXT: addiw a2, a2, -1
-; RV64-NEXT: addiw a3, a3, -2048
-; RV64-NEXT: addiw a3, a3, -307
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: ret
+; CHECK-LABEL: sub_constant_rhs:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: lui a0, %hi(.LCPI2_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI2_0)
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: ret
%e0 = sub i32 %a, 23
%e1 = sub i32 %b, 25
%e2 = sub i32 %c, 1
@@ -145,41 +88,18 @@ define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
}
define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: mul_constant_rhs:
-; RV32: # %bb.0:
-; RV32-NEXT: li a4, 23
-; RV32-NEXT: mul a0, a0, a4
-; RV32-NEXT: li a4, 25
-; RV32-NEXT: mul a1, a1, a4
-; RV32-NEXT: li a4, 27
-; RV32-NEXT: mul a2, a2, a4
-; RV32-NEXT: lui a4, 1
-; RV32-NEXT: addi a4, a4, -1741
-; RV32-NEXT: mul a3, a3, a4
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mul_constant_rhs:
-; RV64: # %bb.0:
-; RV64-NEXT: li a4, 23
-; RV64-NEXT: mulw a0, a0, a4
-; RV64-NEXT: li a4, 25
-; RV64-NEXT: mulw a1, a1, a4
-; RV64-NEXT: li a4, 27
-; RV64-NEXT: mulw a2, a2, a4
-; RV64-NEXT: lui a4, 1
-; RV64-NEXT: addiw a4, a4, -1741
-; RV64-NEXT: mulw a3, a3, a4
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: ret
+; CHECK-LABEL: mul_constant_rhs:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: lui a0, %hi(.LCPI3_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI3_0)
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vmul.vv v8, v8, v9
+; CHECK-NEXT: ret
%e0 = mul i32 %a, 23
%e1 = mul i32 %b, 25
%e2 = mul i32 %c, 27
@@ -192,60 +112,30 @@ define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
}
define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: udiv_constant_rhs:
-; RV32: # %bb.0:
-; RV32-NEXT: lui a4, 729444
-; RV32-NEXT: addi a4, a4, 713
-; RV32-NEXT: mulhu a0, a0, a4
-; RV32-NEXT: srli a0, a0, 4
-; RV32-NEXT: lui a4, 335544
-; RV32-NEXT: addi a4, a4, 1311
-; RV32-NEXT: mulhu a1, a1, a4
-; RV32-NEXT: srli a1, a1, 3
-; RV32-NEXT: lui a4, 93703
-; RV32-NEXT: addi a4, a4, -1899
-; RV32-NEXT: mulhu a4, a3, a4
-; RV32-NEXT: sub a3, a3, a4
-; RV32-NEXT: srli a3, a3, 1
-; RV32-NEXT: add a3, a3, a4
-; RV32-NEXT: srli a3, a3, 7
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: ret
-;
-; RV64-LABEL: udiv_constant_rhs:
-; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: lui a4, 729444
-; RV64-NEXT: addiw a4, a4, 713
-; RV64-NEXT: slli a4, a4, 32
-; RV64-NEXT: mulhu a0, a0, a4
-; RV64-NEXT: srli a0, a0, 36
-; RV64-NEXT: slli a1, a1, 32
-; RV64-NEXT: lui a4, 335544
-; RV64-NEXT: addiw a4, a4, 1311
-; RV64-NEXT: slli a4, a4, 32
-; RV64-NEXT: mulhu a1, a1, a4
-; RV64-NEXT: srli a1, a1, 35
-; RV64-NEXT: slli a4, a3, 32
-; RV64-NEXT: lui a5, 93703
-; RV64-NEXT: addiw a5, a5, -1899
-; RV64-NEXT: slli a5, a5, 32
-; RV64-NEXT: mulhu a4, a4, a5
-; RV64-NEXT: srli a4, a4, 32
-; RV64-NEXT: subw a3, a3, a4
-; RV64-NEXT: srliw a3, a3, 1
-; RV64-NEXT: add a3, a3, a4
-; RV64-NEXT: srli a3, a3, 7
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: ret
+; CHECK-LABEL: udiv_constant_rhs:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: lui a0, %hi(.LCPI4_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0)
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vmulhu.vv v9, v8, v9
+; CHECK-NEXT: vsub.vv v10, v8, v9
+; CHECK-NEXT: vmv.v.i v11, 0
+; CHECK-NEXT: lui a0, 524288
+; CHECK-NEXT: vslide1down.vx v11, v11, a0
+; CHECK-NEXT: lui a0, %hi(.LCPI4_1)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_1)
+; CHECK-NEXT: vle32.v v12, (a0)
+; CHECK-NEXT: vmulhu.vv v10, v10, v11
+; CHECK-NEXT: vadd.vv v9, v10, v9
+; CHECK-NEXT: vmv.v.i v0, 4
+; CHECK-NEXT: vsrl.vv v9, v9, v12
+; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT: ret
%e0 = udiv i32 %a, 23
%e1 = udiv i32 %b, 25
%e2 = udiv i32 %c, 1
@@ -261,21 +151,15 @@ define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: fadd_constant_rhs:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 269184
-; CHECK-NEXT: fmv.w.x fa5, a0
-; CHECK-NEXT: fadd.s fa4, fa0, fa5
-; CHECK-NEXT: lui a0, 269440
-; CHECK-NEXT: fmv.w.x fa0, a0
-; CHECK-NEXT: fadd.s fa1, fa1, fa0
-; CHECK-NEXT: lui a0, 262144
-; CHECK-NEXT: fmv.w.x fa0, a0
-; CHECK-NEXT: fadd.s fa2, fa2, fa0
-; CHECK-NEXT: fadd.s fa5, fa3, fa5
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: lui a0, %hi(.LCPI5_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0)
+; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT: vfadd.vv v8, v8, v9
; CHECK-NEXT: ret
%e0 = fadd float %a, 23.0
%e1 = fadd float %b, 25.0
@@ -291,21 +175,15 @@ define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) {
define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) {
; CHECK-LABEL: fdiv_constant_rhs:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, 269184
-; CHECK-NEXT: fmv.w.x fa5, a0
-; CHECK-NEXT: fdiv.s fa4, fa0, fa5
-; CHECK-NEXT: lui a0, 269440
-; CHECK-NEXT: fmv.w.x fa0, a0
-; CHECK-NEXT: fdiv.s fa1, fa1, fa0
-; CHECK-NEXT: lui a0, 266752
-; CHECK-NEXT: fmv.w.x fa0, a0
-; CHECK-NEXT: fdiv.s fa2, fa2, fa0
-; CHECK-NEXT: fdiv.s fa5, fa3, fa5
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT: lui a0, %hi(.LCPI6_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0)
+; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: vfslide1down.vf v8, v8, fa1
; CHECK-NEXT: vfslide1down.vf v8, v8, fa2
-; CHECK-NEXT: vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT: vfdiv.vv v8, v8, v9
; CHECK-NEXT: ret
%e0 = fdiv float %a, 23.0
%e1 = fdiv float %b, 25.0
@@ -319,31 +197,16 @@ define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) {
}
define <4 x i32> @add_constant_rhs_splat(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: add_constant_rhs_splat:
-; RV32: # %bb.0:
-; RV32-NEXT: addi a0, a0, 23
-; RV32-NEXT: addi a1, a1, 23
-; RV32-NEXT: addi a2, a2, 23
-; RV32-NEXT: addi a3, a3, 23
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: ret
-;
-; RV64-LABEL: add_constant_rhs_splat:
-; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, 23
-; RV64-NEXT: addiw a1, a1, 23
-; RV64-NEXT: addiw a2, a2, 23
-; RV64-NEXT: addiw a3, a3, 23
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: ret
+; CHECK-LABEL: add_constant_rhs_splat:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: li a0, 23
+; CHECK-NEXT: vadd.vx v8, v8, a0
+; CHECK-NEXT: ret
%e0 = add i32 %a, 23
%e1 = add i32 %b, 23
%e2 = add i32 %c, 23
@@ -458,33 +321,18 @@ define <4 x i32> @add_constant_rhs_identity2(i32 %a, i32 %b, i32 %c, i32 %d) {
}
define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: add_constant_rhs_inverse:
-; RV32: # %bb.0:
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: addi a1, a1, 25
-; RV32-NEXT: addi a2, a2, 1
-; RV32-NEXT: addi a3, a3, 2047
-; RV32-NEXT: addi a3, a3, 308
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: ret
-;
-; RV64-LABEL: add_constant_rhs_inverse:
-; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, -1
-; RV64-NEXT: addiw a1, a1, 25
-; RV64-NEXT: addiw a2, a2, 1
-; RV64-NEXT: addiw a3, a3, 2047
-; RV64-NEXT: addiw a3, a3, 308
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: ret
+; CHECK-LABEL: add_constant_rhs_inverse:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: lui a0, %hi(.LCPI11_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI11_0)
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
%e0 = sub i32 %a, 1
%e1 = add i32 %b, 25
%e2 = add i32 %c, 1
@@ -497,33 +345,18 @@ define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) {
}
define <4 x i32> @add_constant_rhs_commute(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: add_constant_rhs_commute:
-; RV32: # %bb.0:
-; RV32-NEXT: addi a0, a0, 23
-; RV32-NEXT: addi a1, a1, 25
-; RV32-NEXT: addi a2, a2, 1
-; RV32-NEXT: addi a3, a3, 2047
-; RV32-NEXT: addi a3, a3, 308
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: ret
-;
-; RV64-LABEL: add_constant_rhs_commute:
-; RV64: # %bb.0:
-; RV64-NEXT: addiw a0, a0, 23
-; RV64-NEXT: addiw a1, a1, 25
-; RV64-NEXT: addiw a2, a2, 1
-; RV64-NEXT: addiw a3, a3, 2047
-; RV64-NEXT: addiw a3, a3, 308
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: ret
+; CHECK-LABEL: add_constant_rhs_commute:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
+; CHECK-NEXT: lui a0, %hi(.LCPI12_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0)
+; CHECK-NEXT: vle32.v v9, (a0)
+; CHECK-NEXT: vslide1down.vx v8, v8, a1
+; CHECK-NEXT: vslide1down.vx v8, v8, a2
+; CHECK-NEXT: vslide1down.vx v8, v8, a3
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: ret
%e0 = add i32 %a, 23
%e1 = add i32 %b, 25
%e2 = add i32 1, %c
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
index 820e875d3ee3bb0..d27e7799a38628a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
@@ -3,65 +3,30 @@
; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64
define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
-; RV32-LABEL: vselect_vv_v6i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT: lbu a2, 0(a2)
-; RV32-NEXT: vle32.v v8, (a1)
-; RV32-NEXT: andi a1, a2, 1
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v10, v8, a1
-; RV32-NEXT: slli a1, a2, 30
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 29
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 28
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 27
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: srli a2, a2, 5
-; RV32-NEXT: vslide1down.vx v10, v10, a2
-; RV32-NEXT: vslidedown.vi v10, v10, 2
-; RV32-NEXT: vand.vi v10, v10, 1
-; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu
-; RV32-NEXT: vle32.v v8, (a0), v0.t
-; RV32-NEXT: vse32.v v8, (a3)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vselect_vv_v6i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT: lbu a2, 0(a2)
-; RV64-NEXT: vle32.v v8, (a1)
-; RV64-NEXT: andi a1, a2, 1
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vslide1down.vx v10, v8, a1
-; RV64-NEXT: slli a1, a2, 62
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 61
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 60
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 59
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: srli a2, a2, 5
-; RV64-NEXT: vslide1down.vx v10, v10, a2
-; RV64-NEXT: vslidedown.vi v10, v10, 2
-; RV64-NEXT: vand.vi v10, v10, 1
-; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu
-; RV64-NEXT: vle32.v v8, (a0), v0.t
-; RV64-NEXT: vse32.v v8, (a3)
-; RV64-NEXT: ret
+; CHECK-LABEL: vselect_vv_v6i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT: lbu a2, 0(a2)
+; CHECK-NEXT: vle32.v v8, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vslide1down.vx v10, v8, a2
+; CHECK-NEXT: srli a1, a2, 1
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: srli a1, a2, 2
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: srli a1, a2, 3
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: srli a1, a2, 4
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: srli a2, a2, 5
+; CHECK-NEXT: vslide1down.vx v10, v10, a2
+; CHECK-NEXT: vslidedown.vi v10, v10, 2
+; CHECK-NEXT: vand.vi v10, v10, 1
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, mu
+; CHECK-NEXT: vle32.v v8, (a0), v0.t
+; CHECK-NEXT: vse32.v v8, (a3)
+; CHECK-NEXT: ret
%va = load <6 x i32>, ptr %a
%vb = load <6 x i32>, ptr %b
%vcc = load <6 x i1>, ptr %cc
@@ -71,67 +36,31 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
}
define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
-; RV32-LABEL: vselect_vx_v6i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT: lbu a2, 0(a2)
-; RV32-NEXT: vle32.v v8, (a1)
-; RV32-NEXT: andi a1, a2, 1
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v10, v8, a1
-; RV32-NEXT: slli a1, a2, 30
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 29
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 28
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 27
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: srli a2, a2, 5
-; RV32-NEXT: vslide1down.vx v10, v10, a2
-; RV32-NEXT: vslidedown.vi v10, v10, 2
-; RV32-NEXT: vand.vi v10, v10, 1
-; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmerge.vxm v8, v8, a0, v0
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT: vse32.v v8, (a3)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vselect_vx_v6i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT: lbu a2, 0(a2)
-; RV64-NEXT: vle32.v v8, (a1)
-; RV64-NEXT: andi a1, a2, 1
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vslide1down.vx v10, v8, a1
-; RV64-NEXT: slli a1, a2, 62
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 61
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 60
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 59
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: srli a2, a2, 5
-; RV64-NEXT: vslide1down.vx v10, v10, a2
-; RV64-NEXT: vslidedown.vi v10, v10, 2
-; RV64-NEXT: vand.vi v10, v10, 1
-; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmerge.vxm v8, v8, a0, v0
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT: vse32.v v8, (a3)
-; RV64-NEXT: ret
+; CHECK-LABEL: vselect_vx_v6i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT: lbu a2, 0(a2)
+; CHECK-NEXT: vle32.v v8, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vslide1down.vx v10, v8, a2
+; CHECK-NEXT: srli a1, a2, 1
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: srli a1, a2, 2
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: srli a1, a2, 3
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: srli a1, a2, 4
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: srli a2, a2, 5
+; CHECK-NEXT: vslide1down.vx v10, v10, a2
+; CHECK-NEXT: vslidedown.vi v10, v10, 2
+; CHECK-NEXT: vand.vi v10, v10, 1
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a3)
+; CHECK-NEXT: ret
%vb = load <6 x i32>, ptr %b
%ahead = insertelement <6 x i32> poison, i32 %a, i32 0
%va = shufflevector <6 x i32> %ahead, <6 x i32> poison, <6 x i32> zeroinitializer
@@ -142,67 +71,31 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
}
define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
-; RV32-LABEL: vselect_vi_v6i32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT: lbu a1, 0(a1)
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: andi a0, a1, 1
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v10, v8, a0
-; RV32-NEXT: slli a0, a1, 30
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: slli a0, a1, 29
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: slli a0, a1, 28
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: slli a0, a1, 27
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: srli a1, a1, 5
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: vslidedown.vi v10, v10, 2
-; RV32-NEXT: vand.vi v10, v10, 1
-; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmerge.vim v8, v8, -1, v0
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT: vse32.v v8, (a2)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vselect_vi_v6i32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT: lbu a1, 0(a1)
-; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: andi a0, a1, 1
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vslide1down.vx v10, v8, a0
-; RV64-NEXT: slli a0, a1, 62
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a0
-; RV64-NEXT: slli a0, a1, 61
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a0
-; RV64-NEXT: slli a0, a1, 60
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a0
-; RV64-NEXT: slli a0, a1, 59
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a0
-; RV64-NEXT: srli a1, a1, 5
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: vslidedown.vi v10, v10, 2
-; RV64-NEXT: vand.vi v10, v10, 1
-; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmerge.vim v8, v8, -1, v0
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT: vse32.v v8, (a2)
-; RV64-NEXT: ret
+; CHECK-LABEL: vselect_vi_v6i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT: lbu a1, 0(a1)
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vslide1down.vx v10, v8, a1
+; CHECK-NEXT: srli a0, a1, 1
+; CHECK-NEXT: vslide1down.vx v10, v10, a0
+; CHECK-NEXT: srli a0, a1, 2
+; CHECK-NEXT: vslide1down.vx v10, v10, a0
+; CHECK-NEXT: srli a0, a1, 3
+; CHECK-NEXT: vslide1down.vx v10, v10, a0
+; CHECK-NEXT: srli a0, a1, 4
+; CHECK-NEXT: vslide1down.vx v10, v10, a0
+; CHECK-NEXT: srli a1, a1, 5
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: vslidedown.vi v10, v10, 2
+; CHECK-NEXT: vand.vi v10, v10, 1
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, -1, v0
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a2)
+; CHECK-NEXT: ret
%vb = load <6 x i32>, ptr %b
%a = insertelement <6 x i32> poison, i32 -1, i32 0
%va = shufflevector <6 x i32> %a, <6 x i32> poison, <6 x i32> zeroinitializer
@@ -214,65 +107,30 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
-; RV32-LABEL: vselect_vv_v6f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT: lbu a2, 0(a2)
-; RV32-NEXT: vle32.v v8, (a1)
-; RV32-NEXT: andi a1, a2, 1
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v10, v8, a1
-; RV32-NEXT: slli a1, a2, 30
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 29
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 28
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 27
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: srli a2, a2, 5
-; RV32-NEXT: vslide1down.vx v10, v10, a2
-; RV32-NEXT: vslidedown.vi v10, v10, 2
-; RV32-NEXT: vand.vi v10, v10, 1
-; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetivli zero, 6, e32, m2, tu, mu
-; RV32-NEXT: vle32.v v8, (a0), v0.t
-; RV32-NEXT: vse32.v v8, (a3)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vselect_vv_v6f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT: lbu a2, 0(a2)
-; RV64-NEXT: vle32.v v8, (a1)
-; RV64-NEXT: andi a1, a2, 1
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vslide1down.vx v10, v8, a1
-; RV64-NEXT: slli a1, a2, 62
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 61
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 60
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 59
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: srli a2, a2, 5
-; RV64-NEXT: vslide1down.vx v10, v10, a2
-; RV64-NEXT: vslidedown.vi v10, v10, 2
-; RV64-NEXT: vand.vi v10, v10, 1
-; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetivli zero, 6, e32, m2, tu, mu
-; RV64-NEXT: vle32.v v8, (a0), v0.t
-; RV64-NEXT: vse32.v v8, (a3)
-; RV64-NEXT: ret
+; CHECK-LABEL: vselect_vv_v6f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT: lbu a2, 0(a2)
+; CHECK-NEXT: vle32.v v8, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vslide1down.vx v10, v8, a2
+; CHECK-NEXT: srli a1, a2, 1
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: srli a1, a2, 2
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: srli a1, a2, 3
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: srli a1, a2, 4
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: srli a2, a2, 5
+; CHECK-NEXT: vslide1down.vx v10, v10, a2
+; CHECK-NEXT: vslidedown.vi v10, v10, 2
+; CHECK-NEXT: vand.vi v10, v10, 1
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, mu
+; CHECK-NEXT: vle32.v v8, (a0), v0.t
+; CHECK-NEXT: vse32.v v8, (a3)
+; CHECK-NEXT: ret
%va = load <6 x float>, ptr %a
%vb = load <6 x float>, ptr %b
%vcc = load <6 x i1>, ptr %cc
@@ -282,67 +140,31 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
}
define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
-; RV32-LABEL: vselect_vx_v6f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT: lbu a1, 0(a1)
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: andi a0, a1, 1
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v10, v8, a0
-; RV32-NEXT: slli a0, a1, 30
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: slli a0, a1, 29
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: slli a0, a1, 28
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: slli a0, a1, 27
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: srli a1, a1, 5
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: vslidedown.vi v10, v10, 2
-; RV32-NEXT: vand.vi v10, v10, 1
-; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT: vse32.v v8, (a2)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vselect_vx_v6f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT: lbu a1, 0(a1)
-; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: andi a0, a1, 1
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vslide1down.vx v10, v8, a0
-; RV64-NEXT: slli a0, a1, 62
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a0
-; RV64-NEXT: slli a0, a1, 61
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a0
-; RV64-NEXT: slli a0, a1, 60
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a0
-; RV64-NEXT: slli a0, a1, 59
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a0
-; RV64-NEXT: srli a1, a1, 5
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: vslidedown.vi v10, v10, 2
-; RV64-NEXT: vand.vi v10, v10, 1
-; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT: vse32.v v8, (a2)
-; RV64-NEXT: ret
+; CHECK-LABEL: vselect_vx_v6f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT: lbu a1, 0(a1)
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vslide1down.vx v10, v8, a1
+; CHECK-NEXT: srli a0, a1, 1
+; CHECK-NEXT: vslide1down.vx v10, v10, a0
+; CHECK-NEXT: srli a0, a1, 2
+; CHECK-NEXT: vslide1down.vx v10, v10, a0
+; CHECK-NEXT: srli a0, a1, 3
+; CHECK-NEXT: vslide1down.vx v10, v10, a0
+; CHECK-NEXT: srli a0, a1, 4
+; CHECK-NEXT: vslide1down.vx v10, v10, a0
+; CHECK-NEXT: srli a1, a1, 5
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: vslidedown.vi v10, v10, 2
+; CHECK-NEXT: vand.vi v10, v10, 1
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a2)
+; CHECK-NEXT: ret
%vb = load <6 x float>, ptr %b
%ahead = insertelement <6 x float> poison, float %a, i32 0
%va = shufflevector <6 x float> %ahead, <6 x float> poison, <6 x i32> zeroinitializer
@@ -353,67 +175,31 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
}
define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
-; RV32-LABEL: vselect_vfpzero_v6f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT: lbu a1, 0(a1)
-; RV32-NEXT: vle32.v v8, (a0)
-; RV32-NEXT: andi a0, a1, 1
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v10, v8, a0
-; RV32-NEXT: slli a0, a1, 30
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: slli a0, a1, 29
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: slli a0, a1, 28
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: slli a0, a1, 27
-; RV32-NEXT: srli a0, a0, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a0
-; RV32-NEXT: srli a1, a1, 5
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: vslidedown.vi v10, v10, 2
-; RV32-NEXT: vand.vi v10, v10, 1
-; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT: vmerge.vim v8, v8, 0, v0
-; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT: vse32.v v8, (a2)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vselect_vfpzero_v6f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT: lbu a1, 0(a1)
-; RV64-NEXT: vle32.v v8, (a0)
-; RV64-NEXT: andi a0, a1, 1
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vslide1down.vx v10, v8, a0
-; RV64-NEXT: slli a0, a1, 62
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a0
-; RV64-NEXT: slli a0, a1, 61
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a0
-; RV64-NEXT: slli a0, a1, 60
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a0
-; RV64-NEXT: slli a0, a1, 59
-; RV64-NEXT: srli a0, a0, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a0
-; RV64-NEXT: srli a1, a1, 5
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: vslidedown.vi v10, v10, 2
-; RV64-NEXT: vand.vi v10, v10, 1
-; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT: vmerge.vim v8, v8, 0, v0
-; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT: vse32.v v8, (a2)
-; RV64-NEXT: ret
+; CHECK-LABEL: vselect_vfpzero_v6f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT: lbu a1, 0(a1)
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT: vslide1down.vx v10, v8, a1
+; CHECK-NEXT: srli a0, a1, 1
+; CHECK-NEXT: vslide1down.vx v10, v10, a0
+; CHECK-NEXT: srli a0, a1, 2
+; CHECK-NEXT: vslide1down.vx v10, v10, a0
+; CHECK-NEXT: srli a0, a1, 3
+; CHECK-NEXT: vslide1down.vx v10, v10, a0
+; CHECK-NEXT: srli a0, a1, 4
+; CHECK-NEXT: vslide1down.vx v10, v10, a0
+; CHECK-NEXT: srli a1, a1, 5
+; CHECK-NEXT: vslide1down.vx v10, v10, a1
+; CHECK-NEXT: vslidedown.vi v10, v10, 2
+; CHECK-NEXT: vand.vi v10, v10, 1
+; CHECK-NEXT: vmsne.vi v0, v10, 0
+; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 0, v0
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a2)
+; CHECK-NEXT: ret
%vb = load <6 x float>, ptr %b
%a = insertelement <6 x float> poison, float 0.0, i32 0
%va = shufflevector <6 x float> %a, <6 x float> poison, <6 x i32> zeroinitializer
@@ -711,3 +497,6 @@ define <64 x i1> @vselect_v64i1(<64 x i1> %a, <64 x i1> %b, <64 x i1> %cc) {
%v = select <64 x i1> %cc, <64 x i1> %a, <64 x i1> %b
ret <64 x i1> %v
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index ee91bae6b6e0271..dcf701be76f62a9 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -725,79 +725,24 @@ define void @test_srem_vec(ptr %X) nounwind {
;
; RV64MV-LABEL: test_srem_vec:
; RV64MV: # %bb.0:
-; RV64MV-NEXT: ld a1, 0(a0)
-; RV64MV-NEXT: lwu a2, 8(a0)
-; RV64MV-NEXT: srli a3, a1, 2
-; RV64MV-NEXT: lbu a4, 12(a0)
-; RV64MV-NEXT: slli a5, a2, 62
-; RV64MV-NEXT: or a3, a5, a3
-; RV64MV-NEXT: srai a3, a3, 31
-; RV64MV-NEXT: slli a4, a4, 32
-; RV64MV-NEXT: or a2, a2, a4
-; RV64MV-NEXT: slli a2, a2, 29
-; RV64MV-NEXT: lui a4, %hi(.LCPI3_0)
-; RV64MV-NEXT: ld a4, %lo(.LCPI3_0)(a4)
-; RV64MV-NEXT: srai a2, a2, 31
-; RV64MV-NEXT: slli a1, a1, 31
-; RV64MV-NEXT: srai a1, a1, 31
-; RV64MV-NEXT: mulh a4, a2, a4
-; RV64MV-NEXT: srli a5, a4, 63
-; RV64MV-NEXT: srai a4, a4, 1
-; RV64MV-NEXT: add a4, a4, a5
-; RV64MV-NEXT: lui a5, %hi(.LCPI3_1)
-; RV64MV-NEXT: ld a5, %lo(.LCPI3_1)(a5)
-; RV64MV-NEXT: add a2, a2, a4
-; RV64MV-NEXT: slli a4, a4, 2
-; RV64MV-NEXT: add a2, a2, a4
-; RV64MV-NEXT: mulh a4, a3, a5
-; RV64MV-NEXT: srli a5, a4, 63
-; RV64MV-NEXT: srai a4, a4, 1
-; RV64MV-NEXT: add a4, a4, a5
-; RV64MV-NEXT: lui a5, %hi(.LCPI3_2)
-; RV64MV-NEXT: ld a5, %lo(.LCPI3_2)(a5)
-; RV64MV-NEXT: add a3, a3, a4
-; RV64MV-NEXT: slli a4, a4, 3
-; RV64MV-NEXT: sub a3, a3, a4
-; RV64MV-NEXT: mulh a4, a1, a5
-; RV64MV-NEXT: srli a5, a4, 63
-; RV64MV-NEXT: add a4, a4, a5
-; RV64MV-NEXT: li a5, 6
-; RV64MV-NEXT: mul a4, a4, a5
-; RV64MV-NEXT: sub a1, a1, a4
; RV64MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64MV-NEXT: vslide1down.vx v8, v8, a1
-; RV64MV-NEXT: vslide1down.vx v8, v8, a3
-; RV64MV-NEXT: vslide1down.vx v8, v8, a2
-; RV64MV-NEXT: vslidedown.vi v8, v8, 1
-; RV64MV-NEXT: li a1, -1
-; RV64MV-NEXT: srli a1, a1, 31
-; RV64MV-NEXT: vand.vx v8, v8, a1
-; RV64MV-NEXT: lui a2, 32
-; RV64MV-NEXT: addiw a2, a2, 256
-; RV64MV-NEXT: vmv.s.x v10, a2
-; RV64MV-NEXT: vsext.vf8 v12, v10
-; RV64MV-NEXT: vmsne.vv v0, v8, v12
-; RV64MV-NEXT: vmv.v.i v8, 0
-; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0
+; RV64MV-NEXT: vmv.v.i v8, -1
; RV64MV-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64MV-NEXT: vslidedown.vi v10, v8, 2
-; RV64MV-NEXT: vmv.x.s a2, v10
-; RV64MV-NEXT: slli a3, a2, 31
-; RV64MV-NEXT: srli a3, a3, 61
-; RV64MV-NEXT: sb a3, 12(a0)
-; RV64MV-NEXT: vmv.x.s a3, v8
-; RV64MV-NEXT: and a1, a3, a1
+; RV64MV-NEXT: vmv.x.s a1, v10
+; RV64MV-NEXT: slli a2, a1, 31
+; RV64MV-NEXT: srli a2, a2, 61
+; RV64MV-NEXT: sb a2, 12(a0)
; RV64MV-NEXT: vsetivli zero, 1, e64, m1, ta, ma
; RV64MV-NEXT: vslidedown.vi v8, v8, 1
-; RV64MV-NEXT: vmv.x.s a3, v8
-; RV64MV-NEXT: slli a4, a3, 33
-; RV64MV-NEXT: or a1, a1, a4
-; RV64MV-NEXT: sd a1, 0(a0)
-; RV64MV-NEXT: slli a2, a2, 2
-; RV64MV-NEXT: slli a3, a3, 31
-; RV64MV-NEXT: srli a3, a3, 62
-; RV64MV-NEXT: or a2, a3, a2
-; RV64MV-NEXT: sw a2, 8(a0)
+; RV64MV-NEXT: vmv.x.s a2, v8
+; RV64MV-NEXT: slli a3, a2, 33
+; RV64MV-NEXT: sd a3, 0(a0)
+; RV64MV-NEXT: slli a1, a1, 2
+; RV64MV-NEXT: slli a2, a2, 31
+; RV64MV-NEXT: srli a2, a2, 62
+; RV64MV-NEXT: or a1, a2, a1
+; RV64MV-NEXT: sw a1, 8(a0)
; RV64MV-NEXT: ret
%ld = load <3 x i33>, ptr %X
%srem = srem <3 x i33> %ld, <i33 6, i33 7, i33 -5>
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index f24b6107f15ada7..456d98fd4e47ffe 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -521,35 +521,33 @@ define void @test_urem_vec(ptr %X) nounwind {
;
; RV32MV-LABEL: test_urem_vec:
; RV32MV: # %bb.0:
-; RV32MV-NEXT: lw a1, 0(a0)
-; RV32MV-NEXT: andi a2, a1, 2047
+; RV32MV-NEXT: lbu a1, 4(a0)
+; RV32MV-NEXT: lw a2, 0(a0)
+; RV32MV-NEXT: slli a1, a1, 10
+; RV32MV-NEXT: srli a3, a2, 22
+; RV32MV-NEXT: or a1, a3, a1
; RV32MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
; RV32MV-NEXT: vslide1down.vx v8, v8, a2
-; RV32MV-NEXT: lbu a2, 4(a0)
-; RV32MV-NEXT: slli a3, a1, 10
-; RV32MV-NEXT: srli a3, a3, 21
-; RV32MV-NEXT: vslide1down.vx v8, v8, a3
-; RV32MV-NEXT: slli a2, a2, 10
-; RV32MV-NEXT: srli a1, a1, 22
-; RV32MV-NEXT: or a1, a1, a2
-; RV32MV-NEXT: andi a1, a1, 2047
+; RV32MV-NEXT: srli a2, a2, 11
+; RV32MV-NEXT: vslide1down.vx v8, v8, a2
; RV32MV-NEXT: vslide1down.vx v8, v8, a1
-; RV32MV-NEXT: lui a1, %hi(.LCPI4_0)
-; RV32MV-NEXT: addi a1, a1, %lo(.LCPI4_0)
-; RV32MV-NEXT: vle16.v v9, (a1)
; RV32MV-NEXT: vslidedown.vi v8, v8, 1
+; RV32MV-NEXT: li a1, 2047
+; RV32MV-NEXT: lui a2, %hi(.LCPI4_0)
+; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_0)
+; RV32MV-NEXT: vle16.v v9, (a2)
+; RV32MV-NEXT: vand.vx v8, v8, a1
; RV32MV-NEXT: vid.v v10
; RV32MV-NEXT: vsub.vv v8, v8, v10
; RV32MV-NEXT: vmul.vv v8, v8, v9
; RV32MV-NEXT: vadd.vv v9, v8, v8
-; RV32MV-NEXT: lui a1, 41121
-; RV32MV-NEXT: addi a1, a1, -1527
+; RV32MV-NEXT: lui a2, 41121
+; RV32MV-NEXT: addi a2, a2, -1527
; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV32MV-NEXT: vmv.s.x v10, a1
+; RV32MV-NEXT: vmv.s.x v10, a2
; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV32MV-NEXT: vsext.vf2 v11, v10
; RV32MV-NEXT: vsll.vv v9, v9, v11
-; RV32MV-NEXT: li a1, 2047
; RV32MV-NEXT: vand.vx v8, v8, a1
; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV32MV-NEXT: vmv.v.i v10, 1
@@ -587,30 +585,31 @@ define void @test_urem_vec(ptr %X) nounwind {
; RV64MV-NEXT: lwu a2, 0(a0)
; RV64MV-NEXT: slli a1, a1, 32
; RV64MV-NEXT: or a1, a2, a1
-; RV64MV-NEXT: andi a2, a1, 2047
; RV64MV-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64MV-NEXT: vslide1down.vx v8, v8, a2
-; RV64MV-NEXT: slli a2, a1, 42
-; RV64MV-NEXT: srli a2, a2, 53
+; RV64MV-NEXT: vslide1down.vx v8, v8, a1
+; RV64MV-NEXT: slli a1, a1, 24
+; RV64MV-NEXT: srli a1, a1, 24
+; RV64MV-NEXT: srli a2, a1, 11
; RV64MV-NEXT: vslide1down.vx v8, v8, a2
; RV64MV-NEXT: srli a1, a1, 22
; RV64MV-NEXT: vslide1down.vx v8, v8, a1
-; RV64MV-NEXT: lui a1, %hi(.LCPI4_0)
-; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_0)
-; RV64MV-NEXT: vle16.v v9, (a1)
; RV64MV-NEXT: vslidedown.vi v8, v8, 1
+; RV64MV-NEXT: li a1, 2047
+; RV64MV-NEXT: lui a2, %hi(.LCPI4_0)
+; RV64MV-NEXT: addi a2, a2, %lo(.LCPI4_0)
+; RV64MV-NEXT: vle16.v v9, (a2)
+; RV64MV-NEXT: vand.vx v8, v8, a1
; RV64MV-NEXT: vid.v v10
; RV64MV-NEXT: vsub.vv v8, v8, v10
; RV64MV-NEXT: vmul.vv v8, v8, v9
; RV64MV-NEXT: vadd.vv v9, v8, v8
-; RV64MV-NEXT: lui a1, 41121
-; RV64MV-NEXT: addiw a1, a1, -1527
+; RV64MV-NEXT: lui a2, 41121
+; RV64MV-NEXT: addiw a2, a2, -1527
; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; RV64MV-NEXT: vmv.s.x v10, a1
+; RV64MV-NEXT: vmv.s.x v10, a2
; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64MV-NEXT: vsext.vf2 v11, v10
; RV64MV-NEXT: vsll.vv v9, v9, v11
-; RV64MV-NEXT: li a1, 2047
; RV64MV-NEXT: vand.vx v8, v8, a1
; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma
; RV64MV-NEXT: vmv.v.i v10, 1
>From b0f82fb335ed4c32b839ef9459a6c43e63c2115b Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail at philipreames.com>
Date: Mon, 25 Sep 2023 15:11:31 -0700
Subject: [PATCH 2/4] Update llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Co-authored-by: Luke Lau <luke_lau at icloud.com>
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c40e940f1b8aeae..c5e46f076225d4b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13338,7 +13338,7 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
}
/// IF we have a build_vector where each lanes is binop X, C, where C
-/// is a constant (nut not neccessarily the same constant on all lanes),
+/// is a constant (but not necessarily the same constant on all lanes),
/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
/// We assume that materializing a constant build vector will be no more
/// expensive that performing O(n) binops.
>From a4b9f8e8c08b5101555376d61e354f091a9488e7 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail at philipreames.com>
Date: Mon, 25 Sep 2023 15:11:42 -0700
Subject: [PATCH 3/4] Update llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Co-authored-by: Luke Lau <luke_lau at icloud.com>
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c5e46f076225d4b..a0f1e33bc66625b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13337,7 +13337,7 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
}
-/// IF we have a build_vector where each lanes is binop X, C, where C
+/// IF we have a build_vector where each lane is binop X, C, where C
/// is a constant (but not necessarily the same constant on all lanes),
/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
/// We assume that materializing a constant build vector will be no more
>From 12c98b25d5aaf5c5f6e0c17ffa56b8521b6689cf Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 26 Sep 2023 07:47:10 -0700
Subject: [PATCH 4/4] clang-format fixes
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 ++++------
1 file changed, 4 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a0f1e33bc66625b..c4942f9c637bd8d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13343,8 +13343,8 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
/// We assume that materializing a constant build vector will be no more
/// expensive that performing O(n) binops.
static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget,
- const RISCVTargetLowering &TLI) {
+ const RISCVSubtarget &Subtarget,
+ const RISCVTargetLowering &TLI) {
SDLoc DL(N);
EVT VT = N->getValueType(0);
@@ -13357,8 +13357,7 @@ static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
if (!TLI.isBinOp(Opcode))
return SDValue();
- if (!TLI.isOperationLegalOrCustom(Opcode, VT) ||
- !TLI.isTypeLegal(VT))
+ if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
return SDValue();
SmallVector<SDValue> LHSOps;
@@ -13383,8 +13382,7 @@ static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
RHSOps.push_back(Op.getOperand(1));
}
- return DAG.getNode(Opcode, DL, VT,
- DAG.getBuildVector(VT, DL, LHSOps),
+ return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
DAG.getBuildVector(VT, DL, RHSOps));
}
More information about the llvm-commits
mailing list