[llvm] [RISCV] Transform build_vector((binop X_i, C_i)..) to binop (build_ve… (PR #67358)

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Tue Sep 26 07:52:01 PDT 2023


https://github.com/preames updated https://github.com/llvm/llvm-project/pull/67358

>From 57bbd485e77809cd1a52a3b567d0d553be2695ae Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Wed, 6 Sep 2023 16:30:58 -0700
Subject: [PATCH 1/4] [RISCV] Transform build_vector((binop X_i, C_i)..) to
 binop (build_vector, build_vector)

If we have a build_vector of identical binops, we'd prefer to have a single vector binop in most cases.  We do need to make sure that the two build_vectors aren't more difficult to materialize than the original build_vector.  To start with, let's restrict ourselves to the case where one build_vector is a fully constant vector.

Note that we don't need to worry about speculation safety here.  We are not speculating any of the lanes, and thus none of the typical e.g. div-by-zero concerns apply.

I'll highlight that the constant build_vector heuristic is just one we could chose here.  We just need some way to be reasonable sure the cost of the two build_vectors isn't going to completely outweigh the savings from the binop formation.  I'm open to alternate heuristics here - both more restrictive and more permissive.

As noted in comments, we can extend this in a number of ways.  I decided to start small as a) that helps keep things understandable in review and b) it covers my actual motivating case.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  57 +-
 .../rvv/fixed-vectors-buildvec-of-binop.ll    | 411 +++++---------
 .../RISCV/rvv/fixed-vectors-vselect.ll        | 513 ++++++------------
 .../CodeGen/RISCV/srem-seteq-illegal-types.ll |  81 +--
 .../CodeGen/RISCV/urem-seteq-illegal-types.ll |  55 +-
 5 files changed, 369 insertions(+), 748 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 686350de29883aa..c40e940f1b8aeae 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -1295,7 +1295,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setTargetDAGCombine({ISD::FCOPYSIGN, ISD::MGATHER, ISD::MSCATTER,
                          ISD::VP_GATHER, ISD::VP_SCATTER, ISD::SRA, ISD::SRL,
                          ISD::SHL, ISD::STORE, ISD::SPLAT_VECTOR,
-                         ISD::CONCAT_VECTORS});
+                         ISD::BUILD_VECTOR, ISD::CONCAT_VECTORS});
   if (Subtarget.hasVendorXTHeadMemPair())
     setTargetDAGCombine({ISD::LOAD, ISD::STORE});
   if (Subtarget.useRVVForFixedLengthVectors())
@@ -13337,6 +13337,57 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
   return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
 }
 
+/// IF we have a build_vector where each lanes is binop X, C, where C
+/// is a constant (nut not neccessarily the same constant on all lanes),
+/// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
+/// We assume that materializing a constant build vector will be no more
+/// expensive that performing O(n) binops.
+static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
+                                            const RISCVSubtarget &Subtarget,
+                                            const RISCVTargetLowering &TLI) {
+  SDLoc DL(N);
+  EVT VT = N->getValueType(0);
+
+  assert(!VT.isScalableVector() && "unexpected build vector");
+
+  if (VT.getVectorNumElements() == 1)
+    return SDValue();
+
+  const unsigned Opcode = N->op_begin()->getNode()->getOpcode();
+  if (!TLI.isBinOp(Opcode))
+    return SDValue();
+
+  if (!TLI.isOperationLegalOrCustom(Opcode, VT) ||
+      !TLI.isTypeLegal(VT))
+    return SDValue();
+
+  SmallVector<SDValue> LHSOps;
+  SmallVector<SDValue> RHSOps;
+  for (SDValue Op : N->ops()) {
+    if (Op.isUndef()) {
+      LHSOps.push_back(Op);
+      RHSOps.push_back(Op);
+      continue;
+    }
+
+    // TODO: We can handle operations which have an neutral rhs value
+    // (e.g. x + 0, a * 1 or a << 0), but we then have to keep track
+    // of profit in a more explicit manner.
+    if (Op.getOpcode() != Opcode || !Op.hasOneUse())
+      return SDValue();
+
+    LHSOps.push_back(Op.getOperand(0));
+    if (!isa<ConstantSDNode>(Op.getOperand(1)) &&
+        !isa<ConstantFPSDNode>(Op.getOperand(1)))
+      return SDValue();
+    RHSOps.push_back(Op.getOperand(1));
+  }
+
+  return DAG.getNode(Opcode, DL, VT,
+                     DAG.getBuildVector(VT, DL, LHSOps),
+                     DAG.getBuildVector(VT, DL, RHSOps));
+}
+
 // If we're concatenating a series of vector loads like
 // concat_vectors (load v4i8, p+0), (load v4i8, p+n), (load v4i8, p+n*2) ...
 // Then we can turn this into a strided load by widening the vector elements
@@ -14399,6 +14450,10 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
       return Gather;
     break;
   }
+  case ISD::BUILD_VECTOR:
+    if (SDValue V = performBUILD_VECTORCombine(N, DAG, Subtarget, *this))
+      return V;
+    break;
   case ISD::CONCAT_VECTORS:
     if (SDValue V = performCONCAT_VECTORSCombine(N, DAG, Subtarget, *this))
       return V;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
index 37a43c3550a5282..9e4584eb17ff9a7 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll
@@ -3,33 +3,18 @@
 ; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+d -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
 
 define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: add_constant_rhs:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi a0, a0, 23
-; RV32-NEXT:    addi a1, a1, 25
-; RV32-NEXT:    addi a2, a2, 1
-; RV32-NEXT:    addi a3, a3, 2047
-; RV32-NEXT:    addi a3, a3, 308
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    vslide1down.vx v8, v8, a2
-; RV32-NEXT:    vslide1down.vx v8, v8, a3
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: add_constant_rhs:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a0, a0, 23
-; RV64-NEXT:    addiw a1, a1, 25
-; RV64-NEXT:    addiw a2, a2, 1
-; RV64-NEXT:    addiw a3, a3, 2047
-; RV64-NEXT:    addiw a3, a3, 308
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-NEXT:    vslide1down.vx v8, v8, a1
-; RV64-NEXT:    vslide1down.vx v8, v8, a2
-; RV64-NEXT:    vslide1down.vx v8, v8, a3
-; RV64-NEXT:    ret
+; CHECK-LABEL: add_constant_rhs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, %hi(.LCPI0_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI0_0)
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
   %e0 = add i32 %a, 23
   %e1 = add i32 %b, 25
   %e2 = add i32 %c, 1
@@ -42,49 +27,22 @@ define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
 }
 
 define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
-; RV32-LABEL: add_constant_rhs_8xi32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi a0, a0, 23
-; RV32-NEXT:    addi a1, a1, 25
-; RV32-NEXT:    addi a2, a2, 1
-; RV32-NEXT:    addi a3, a3, 2047
-; RV32-NEXT:    addi a3, a3, 308
-; RV32-NEXT:    addi a4, a4, 23
-; RV32-NEXT:    addi a5, a5, 23
-; RV32-NEXT:    addi a6, a6, 22
-; RV32-NEXT:    addi a7, a7, 23
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    vslide1down.vx v8, v8, a2
-; RV32-NEXT:    vslide1down.vx v8, v8, a3
-; RV32-NEXT:    vslide1down.vx v8, v8, a4
-; RV32-NEXT:    vslide1down.vx v8, v8, a5
-; RV32-NEXT:    vslide1down.vx v8, v8, a6
-; RV32-NEXT:    vslide1down.vx v8, v8, a7
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: add_constant_rhs_8xi32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a0, a0, 23
-; RV64-NEXT:    addiw a1, a1, 25
-; RV64-NEXT:    addiw a2, a2, 1
-; RV64-NEXT:    addiw a3, a3, 2047
-; RV64-NEXT:    addiw a3, a3, 308
-; RV64-NEXT:    addiw a4, a4, 23
-; RV64-NEXT:    addiw a5, a5, 23
-; RV64-NEXT:    addiw a6, a6, 22
-; RV64-NEXT:    addiw a7, a7, 23
-; RV64-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-NEXT:    vslide1down.vx v8, v8, a1
-; RV64-NEXT:    vslide1down.vx v8, v8, a2
-; RV64-NEXT:    vslide1down.vx v8, v8, a3
-; RV64-NEXT:    vslide1down.vx v8, v8, a4
-; RV64-NEXT:    vslide1down.vx v8, v8, a5
-; RV64-NEXT:    vslide1down.vx v8, v8, a6
-; RV64-NEXT:    vslide1down.vx v8, v8, a7
-; RV64-NEXT:    ret
+; CHECK-LABEL: add_constant_rhs_8xi32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vslide1down.vx v8, v8, a4
+; CHECK-NEXT:    lui a0, %hi(.LCPI1_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI1_0)
+; CHECK-NEXT:    vle32.v v10, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, a5
+; CHECK-NEXT:    vslide1down.vx v8, v8, a6
+; CHECK-NEXT:    vslide1down.vx v8, v8, a7
+; CHECK-NEXT:    vadd.vv v8, v8, v10
+; CHECK-NEXT:    ret
   %e0 = add i32 %a, 23
   %e1 = add i32 %b, 25
   %e2 = add i32 %c, 1
@@ -106,33 +64,18 @@ define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e,
 
 
 define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: sub_constant_rhs:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi a0, a0, -23
-; RV32-NEXT:    addi a1, a1, -25
-; RV32-NEXT:    addi a2, a2, -1
-; RV32-NEXT:    addi a3, a3, -2048
-; RV32-NEXT:    addi a3, a3, -307
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    vslide1down.vx v8, v8, a2
-; RV32-NEXT:    vslide1down.vx v8, v8, a3
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: sub_constant_rhs:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a0, a0, -23
-; RV64-NEXT:    addiw a1, a1, -25
-; RV64-NEXT:    addiw a2, a2, -1
-; RV64-NEXT:    addiw a3, a3, -2048
-; RV64-NEXT:    addiw a3, a3, -307
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-NEXT:    vslide1down.vx v8, v8, a1
-; RV64-NEXT:    vslide1down.vx v8, v8, a2
-; RV64-NEXT:    vslide1down.vx v8, v8, a3
-; RV64-NEXT:    ret
+; CHECK-LABEL: sub_constant_rhs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, %hi(.LCPI2_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI2_0)
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vsub.vv v8, v8, v9
+; CHECK-NEXT:    ret
   %e0 = sub i32 %a, 23
   %e1 = sub i32 %b, 25
   %e2 = sub i32 %c, 1
@@ -145,41 +88,18 @@ define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
 }
 
 define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: mul_constant_rhs:
-; RV32:       # %bb.0:
-; RV32-NEXT:    li a4, 23
-; RV32-NEXT:    mul a0, a0, a4
-; RV32-NEXT:    li a4, 25
-; RV32-NEXT:    mul a1, a1, a4
-; RV32-NEXT:    li a4, 27
-; RV32-NEXT:    mul a2, a2, a4
-; RV32-NEXT:    lui a4, 1
-; RV32-NEXT:    addi a4, a4, -1741
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    vslide1down.vx v8, v8, a2
-; RV32-NEXT:    vslide1down.vx v8, v8, a3
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: mul_constant_rhs:
-; RV64:       # %bb.0:
-; RV64-NEXT:    li a4, 23
-; RV64-NEXT:    mulw a0, a0, a4
-; RV64-NEXT:    li a4, 25
-; RV64-NEXT:    mulw a1, a1, a4
-; RV64-NEXT:    li a4, 27
-; RV64-NEXT:    mulw a2, a2, a4
-; RV64-NEXT:    lui a4, 1
-; RV64-NEXT:    addiw a4, a4, -1741
-; RV64-NEXT:    mulw a3, a3, a4
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-NEXT:    vslide1down.vx v8, v8, a1
-; RV64-NEXT:    vslide1down.vx v8, v8, a2
-; RV64-NEXT:    vslide1down.vx v8, v8, a3
-; RV64-NEXT:    ret
+; CHECK-LABEL: mul_constant_rhs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, %hi(.LCPI3_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI3_0)
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vmul.vv v8, v8, v9
+; CHECK-NEXT:    ret
   %e0 = mul i32 %a, 23
   %e1 = mul i32 %b, 25
   %e2 = mul i32 %c, 27
@@ -192,60 +112,30 @@ define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
 }
 
 define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: udiv_constant_rhs:
-; RV32:       # %bb.0:
-; RV32-NEXT:    lui a4, 729444
-; RV32-NEXT:    addi a4, a4, 713
-; RV32-NEXT:    mulhu a0, a0, a4
-; RV32-NEXT:    srli a0, a0, 4
-; RV32-NEXT:    lui a4, 335544
-; RV32-NEXT:    addi a4, a4, 1311
-; RV32-NEXT:    mulhu a1, a1, a4
-; RV32-NEXT:    srli a1, a1, 3
-; RV32-NEXT:    lui a4, 93703
-; RV32-NEXT:    addi a4, a4, -1899
-; RV32-NEXT:    mulhu a4, a3, a4
-; RV32-NEXT:    sub a3, a3, a4
-; RV32-NEXT:    srli a3, a3, 1
-; RV32-NEXT:    add a3, a3, a4
-; RV32-NEXT:    srli a3, a3, 7
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    vslide1down.vx v8, v8, a2
-; RV32-NEXT:    vslide1down.vx v8, v8, a3
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: udiv_constant_rhs:
-; RV64:       # %bb.0:
-; RV64-NEXT:    slli a0, a0, 32
-; RV64-NEXT:    lui a4, 729444
-; RV64-NEXT:    addiw a4, a4, 713
-; RV64-NEXT:    slli a4, a4, 32
-; RV64-NEXT:    mulhu a0, a0, a4
-; RV64-NEXT:    srli a0, a0, 36
-; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    lui a4, 335544
-; RV64-NEXT:    addiw a4, a4, 1311
-; RV64-NEXT:    slli a4, a4, 32
-; RV64-NEXT:    mulhu a1, a1, a4
-; RV64-NEXT:    srli a1, a1, 35
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    lui a5, 93703
-; RV64-NEXT:    addiw a5, a5, -1899
-; RV64-NEXT:    slli a5, a5, 32
-; RV64-NEXT:    mulhu a4, a4, a5
-; RV64-NEXT:    srli a4, a4, 32
-; RV64-NEXT:    subw a3, a3, a4
-; RV64-NEXT:    srliw a3, a3, 1
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    srli a3, a3, 7
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-NEXT:    vslide1down.vx v8, v8, a1
-; RV64-NEXT:    vslide1down.vx v8, v8, a2
-; RV64-NEXT:    vslide1down.vx v8, v8, a3
-; RV64-NEXT:    ret
+; CHECK-LABEL: udiv_constant_rhs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI4_0)
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vmulhu.vv v9, v8, v9
+; CHECK-NEXT:    vsub.vv v10, v8, v9
+; CHECK-NEXT:    vmv.v.i v11, 0
+; CHECK-NEXT:    lui a0, 524288
+; CHECK-NEXT:    vslide1down.vx v11, v11, a0
+; CHECK-NEXT:    lui a0, %hi(.LCPI4_1)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI4_1)
+; CHECK-NEXT:    vle32.v v12, (a0)
+; CHECK-NEXT:    vmulhu.vv v10, v10, v11
+; CHECK-NEXT:    vadd.vv v9, v10, v9
+; CHECK-NEXT:    vmv.v.i v0, 4
+; CHECK-NEXT:    vsrl.vv v9, v9, v12
+; CHECK-NEXT:    vmerge.vvm v8, v9, v8, v0
+; CHECK-NEXT:    ret
   %e0 = udiv i32 %a, 23
   %e1 = udiv i32 %b, 25
   %e2 = udiv i32 %c, 1
@@ -261,21 +151,15 @@ define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) {
 define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) {
 ; CHECK-LABEL: fadd_constant_rhs:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, 269184
-; CHECK-NEXT:    fmv.w.x fa5, a0
-; CHECK-NEXT:    fadd.s fa4, fa0, fa5
-; CHECK-NEXT:    lui a0, 269440
-; CHECK-NEXT:    fmv.w.x fa0, a0
-; CHECK-NEXT:    fadd.s fa1, fa1, fa0
-; CHECK-NEXT:    lui a0, 262144
-; CHECK-NEXT:    fmv.w.x fa0, a0
-; CHECK-NEXT:    fadd.s fa2, fa2, fa0
-; CHECK-NEXT:    fadd.s fa5, fa3, fa5
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI5_0)
+; CHECK-NEXT:    vle32.v v9, (a0)
 ; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
 ; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT:    vfadd.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %e0 = fadd float %a, 23.0
   %e1 = fadd float %b, 25.0
@@ -291,21 +175,15 @@ define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) {
 define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) {
 ; CHECK-LABEL: fdiv_constant_rhs:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, 269184
-; CHECK-NEXT:    fmv.w.x fa5, a0
-; CHECK-NEXT:    fdiv.s fa4, fa0, fa5
-; CHECK-NEXT:    lui a0, 269440
-; CHECK-NEXT:    fmv.w.x fa0, a0
-; CHECK-NEXT:    fdiv.s fa1, fa1, fa0
-; CHECK-NEXT:    lui a0, 266752
-; CHECK-NEXT:    fmv.w.x fa0, a0
-; CHECK-NEXT:    fdiv.s fa2, fa2, fa0
-; CHECK-NEXT:    fdiv.s fa5, fa3, fa5
 ; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa4
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa0
+; CHECK-NEXT:    lui a0, %hi(.LCPI6_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI6_0)
+; CHECK-NEXT:    vle32.v v9, (a0)
 ; CHECK-NEXT:    vfslide1down.vf v8, v8, fa1
 ; CHECK-NEXT:    vfslide1down.vf v8, v8, fa2
-; CHECK-NEXT:    vfslide1down.vf v8, v8, fa5
+; CHECK-NEXT:    vfslide1down.vf v8, v8, fa3
+; CHECK-NEXT:    vfdiv.vv v8, v8, v9
 ; CHECK-NEXT:    ret
   %e0 = fdiv float %a, 23.0
   %e1 = fdiv float %b, 25.0
@@ -319,31 +197,16 @@ define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) {
 }
 
 define <4 x i32> @add_constant_rhs_splat(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: add_constant_rhs_splat:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi a0, a0, 23
-; RV32-NEXT:    addi a1, a1, 23
-; RV32-NEXT:    addi a2, a2, 23
-; RV32-NEXT:    addi a3, a3, 23
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    vslide1down.vx v8, v8, a2
-; RV32-NEXT:    vslide1down.vx v8, v8, a3
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: add_constant_rhs_splat:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a0, a0, 23
-; RV64-NEXT:    addiw a1, a1, 23
-; RV64-NEXT:    addiw a2, a2, 23
-; RV64-NEXT:    addiw a3, a3, 23
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-NEXT:    vslide1down.vx v8, v8, a1
-; RV64-NEXT:    vslide1down.vx v8, v8, a2
-; RV64-NEXT:    vslide1down.vx v8, v8, a3
-; RV64-NEXT:    ret
+; CHECK-LABEL: add_constant_rhs_splat:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    li a0, 23
+; CHECK-NEXT:    vadd.vx v8, v8, a0
+; CHECK-NEXT:    ret
   %e0 = add i32 %a, 23
   %e1 = add i32 %b, 23
   %e2 = add i32 %c, 23
@@ -458,33 +321,18 @@ define <4 x i32> @add_constant_rhs_identity2(i32 %a, i32 %b, i32 %c, i32 %d) {
 }
 
 define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: add_constant_rhs_inverse:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi a0, a0, -1
-; RV32-NEXT:    addi a1, a1, 25
-; RV32-NEXT:    addi a2, a2, 1
-; RV32-NEXT:    addi a3, a3, 2047
-; RV32-NEXT:    addi a3, a3, 308
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    vslide1down.vx v8, v8, a2
-; RV32-NEXT:    vslide1down.vx v8, v8, a3
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: add_constant_rhs_inverse:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a0, a0, -1
-; RV64-NEXT:    addiw a1, a1, 25
-; RV64-NEXT:    addiw a2, a2, 1
-; RV64-NEXT:    addiw a3, a3, 2047
-; RV64-NEXT:    addiw a3, a3, 308
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-NEXT:    vslide1down.vx v8, v8, a1
-; RV64-NEXT:    vslide1down.vx v8, v8, a2
-; RV64-NEXT:    vslide1down.vx v8, v8, a3
-; RV64-NEXT:    ret
+; CHECK-LABEL: add_constant_rhs_inverse:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, %hi(.LCPI11_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI11_0)
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
   %e0 = sub i32 %a, 1
   %e1 = add i32 %b, 25
   %e2 = add i32 %c, 1
@@ -497,33 +345,18 @@ define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) {
 }
 
 define <4 x i32> @add_constant_rhs_commute(i32 %a, i32 %b, i32 %c, i32 %d) {
-; RV32-LABEL: add_constant_rhs_commute:
-; RV32:       # %bb.0:
-; RV32-NEXT:    addi a0, a0, 23
-; RV32-NEXT:    addi a1, a1, 25
-; RV32-NEXT:    addi a2, a2, 1
-; RV32-NEXT:    addi a3, a3, 2047
-; RV32-NEXT:    addi a3, a3, 308
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vslide1down.vx v8, v8, a0
-; RV32-NEXT:    vslide1down.vx v8, v8, a1
-; RV32-NEXT:    vslide1down.vx v8, v8, a2
-; RV32-NEXT:    vslide1down.vx v8, v8, a3
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: add_constant_rhs_commute:
-; RV64:       # %bb.0:
-; RV64-NEXT:    addiw a0, a0, 23
-; RV64-NEXT:    addiw a1, a1, 25
-; RV64-NEXT:    addiw a2, a2, 1
-; RV64-NEXT:    addiw a3, a3, 2047
-; RV64-NEXT:    addiw a3, a3, 308
-; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT:    vslide1down.vx v8, v8, a0
-; RV64-NEXT:    vslide1down.vx v8, v8, a1
-; RV64-NEXT:    vslide1down.vx v8, v8, a2
-; RV64-NEXT:    vslide1down.vx v8, v8, a3
-; RV64-NEXT:    ret
+; CHECK-LABEL: add_constant_rhs_commute:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT:    vslide1down.vx v8, v8, a0
+; CHECK-NEXT:    lui a0, %hi(.LCPI12_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI12_0)
+; CHECK-NEXT:    vle32.v v9, (a0)
+; CHECK-NEXT:    vslide1down.vx v8, v8, a1
+; CHECK-NEXT:    vslide1down.vx v8, v8, a2
+; CHECK-NEXT:    vslide1down.vx v8, v8, a3
+; CHECK-NEXT:    vadd.vv v8, v8, v9
+; CHECK-NEXT:    ret
   %e0 = add i32 %a, 23
   %e1 = add i32 %b, 25
   %e2 = add i32 1, %c
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
index 820e875d3ee3bb0..d27e7799a38628a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
@@ -3,65 +3,30 @@
 ; RUN: llc -mtriple=riscv64 -target-abi=lp64d -mattr=+v,+zfh,+zvfh,+f,+d -verify-machineinstrs < %s | FileCheck %s -check-prefixes=CHECK,RV64
 
 define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
-; RV32-LABEL: vselect_vv_v6i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT:    lbu a2, 0(a2)
-; RV32-NEXT:    vle32.v v8, (a1)
-; RV32-NEXT:    andi a1, a2, 1
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vslide1down.vx v10, v8, a1
-; RV32-NEXT:    slli a1, a2, 30
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 29
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 28
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 27
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    srli a2, a2, 5
-; RV32-NEXT:    vslide1down.vx v10, v10, a2
-; RV32-NEXT:    vslidedown.vi v10, v10, 2
-; RV32-NEXT:    vand.vi v10, v10, 1
-; RV32-NEXT:    vmsne.vi v0, v10, 0
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, tu, mu
-; RV32-NEXT:    vle32.v v8, (a0), v0.t
-; RV32-NEXT:    vse32.v v8, (a3)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vselect_vv_v6i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT:    lbu a2, 0(a2)
-; RV64-NEXT:    vle32.v v8, (a1)
-; RV64-NEXT:    andi a1, a2, 1
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vslide1down.vx v10, v8, a1
-; RV64-NEXT:    slli a1, a2, 62
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 61
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 60
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 59
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    srli a2, a2, 5
-; RV64-NEXT:    vslide1down.vx v10, v10, a2
-; RV64-NEXT:    vslidedown.vi v10, v10, 2
-; RV64-NEXT:    vand.vi v10, v10, 1
-; RV64-NEXT:    vmsne.vi v0, v10, 0
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, tu, mu
-; RV64-NEXT:    vle32.v v8, (a0), v0.t
-; RV64-NEXT:    vse32.v v8, (a3)
-; RV64-NEXT:    ret
+; CHECK-LABEL: vselect_vv_v6i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    lbu a2, 0(a2)
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vslide1down.vx v10, v8, a2
+; CHECK-NEXT:    srli a1, a2, 1
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    srli a1, a2, 2
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    srli a1, a2, 3
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    srli a1, a2, 4
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    srli a2, a2, 5
+; CHECK-NEXT:    vslide1down.vx v10, v10, a2
+; CHECK-NEXT:    vslidedown.vi v10, v10, 2
+; CHECK-NEXT:    vand.vi v10, v10, 1
+; CHECK-NEXT:    vmsne.vi v0, v10, 0
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, mu
+; CHECK-NEXT:    vle32.v v8, (a0), v0.t
+; CHECK-NEXT:    vse32.v v8, (a3)
+; CHECK-NEXT:    ret
   %va = load <6 x i32>, ptr %a
   %vb = load <6 x i32>, ptr %b
   %vcc = load <6 x i1>, ptr %cc
@@ -71,67 +36,31 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 }
 
 define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
-; RV32-LABEL: vselect_vx_v6i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT:    lbu a2, 0(a2)
-; RV32-NEXT:    vle32.v v8, (a1)
-; RV32-NEXT:    andi a1, a2, 1
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vslide1down.vx v10, v8, a1
-; RV32-NEXT:    slli a1, a2, 30
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 29
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 28
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 27
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    srli a2, a2, 5
-; RV32-NEXT:    vslide1down.vx v10, v10, a2
-; RV32-NEXT:    vslidedown.vi v10, v10, 2
-; RV32-NEXT:    vand.vi v10, v10, 1
-; RV32-NEXT:    vmsne.vi v0, v10, 0
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmerge.vxm v8, v8, a0, v0
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT:    vse32.v v8, (a3)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vselect_vx_v6i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT:    lbu a2, 0(a2)
-; RV64-NEXT:    vle32.v v8, (a1)
-; RV64-NEXT:    andi a1, a2, 1
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vslide1down.vx v10, v8, a1
-; RV64-NEXT:    slli a1, a2, 62
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 61
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 60
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 59
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    srli a2, a2, 5
-; RV64-NEXT:    vslide1down.vx v10, v10, a2
-; RV64-NEXT:    vslidedown.vi v10, v10, 2
-; RV64-NEXT:    vand.vi v10, v10, 1
-; RV64-NEXT:    vmsne.vi v0, v10, 0
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vmerge.vxm v8, v8, a0, v0
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT:    vse32.v v8, (a3)
-; RV64-NEXT:    ret
+; CHECK-LABEL: vselect_vx_v6i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    lbu a2, 0(a2)
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vslide1down.vx v10, v8, a2
+; CHECK-NEXT:    srli a1, a2, 1
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    srli a1, a2, 2
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    srli a1, a2, 3
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    srli a1, a2, 4
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    srli a2, a2, 5
+; CHECK-NEXT:    vslide1down.vx v10, v10, a2
+; CHECK-NEXT:    vslidedown.vi v10, v10, 2
+; CHECK-NEXT:    vand.vi v10, v10, 1
+; CHECK-NEXT:    vmsne.vi v0, v10, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vmerge.vxm v8, v8, a0, v0
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a3)
+; CHECK-NEXT:    ret
   %vb = load <6 x i32>, ptr %b
   %ahead = insertelement <6 x i32> poison, i32 %a, i32 0
   %va = shufflevector <6 x i32> %ahead, <6 x i32> poison, <6 x i32> zeroinitializer
@@ -142,67 +71,31 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
 }
 
 define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
-; RV32-LABEL: vselect_vi_v6i32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT:    lbu a1, 0(a1)
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    andi a0, a1, 1
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vslide1down.vx v10, v8, a0
-; RV32-NEXT:    slli a0, a1, 30
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a0
-; RV32-NEXT:    slli a0, a1, 29
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a0
-; RV32-NEXT:    slli a0, a1, 28
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a0
-; RV32-NEXT:    slli a0, a1, 27
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a0
-; RV32-NEXT:    srli a1, a1, 5
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    vslidedown.vi v10, v10, 2
-; RV32-NEXT:    vand.vi v10, v10, 1
-; RV32-NEXT:    vmsne.vi v0, v10, 0
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmerge.vim v8, v8, -1, v0
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT:    vse32.v v8, (a2)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vselect_vi_v6i32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT:    lbu a1, 0(a1)
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    andi a0, a1, 1
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vslide1down.vx v10, v8, a0
-; RV64-NEXT:    slli a0, a1, 62
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a0
-; RV64-NEXT:    slli a0, a1, 61
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a0
-; RV64-NEXT:    slli a0, a1, 60
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a0
-; RV64-NEXT:    slli a0, a1, 59
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a0
-; RV64-NEXT:    srli a1, a1, 5
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    vslidedown.vi v10, v10, 2
-; RV64-NEXT:    vand.vi v10, v10, 1
-; RV64-NEXT:    vmsne.vi v0, v10, 0
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vmerge.vim v8, v8, -1, v0
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT:    vse32.v v8, (a2)
-; RV64-NEXT:    ret
+; CHECK-LABEL: vselect_vi_v6i32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    lbu a1, 0(a1)
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vslide1down.vx v10, v8, a1
+; CHECK-NEXT:    srli a0, a1, 1
+; CHECK-NEXT:    vslide1down.vx v10, v10, a0
+; CHECK-NEXT:    srli a0, a1, 2
+; CHECK-NEXT:    vslide1down.vx v10, v10, a0
+; CHECK-NEXT:    srli a0, a1, 3
+; CHECK-NEXT:    vslide1down.vx v10, v10, a0
+; CHECK-NEXT:    srli a0, a1, 4
+; CHECK-NEXT:    vslide1down.vx v10, v10, a0
+; CHECK-NEXT:    srli a1, a1, 5
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    vslidedown.vi v10, v10, 2
+; CHECK-NEXT:    vand.vi v10, v10, 1
+; CHECK-NEXT:    vmsne.vi v0, v10, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vmerge.vim v8, v8, -1, v0
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a2)
+; CHECK-NEXT:    ret
   %vb = load <6 x i32>, ptr %b
   %a = insertelement <6 x i32> poison, i32 -1, i32 0
   %va = shufflevector <6 x i32> %a, <6 x i32> poison, <6 x i32> zeroinitializer
@@ -214,65 +107,30 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
 
 
 define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
-; RV32-LABEL: vselect_vv_v6f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT:    lbu a2, 0(a2)
-; RV32-NEXT:    vle32.v v8, (a1)
-; RV32-NEXT:    andi a1, a2, 1
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vslide1down.vx v10, v8, a1
-; RV32-NEXT:    slli a1, a2, 30
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 29
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 28
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    slli a1, a2, 27
-; RV32-NEXT:    srli a1, a1, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    srli a2, a2, 5
-; RV32-NEXT:    vslide1down.vx v10, v10, a2
-; RV32-NEXT:    vslidedown.vi v10, v10, 2
-; RV32-NEXT:    vand.vi v10, v10, 1
-; RV32-NEXT:    vmsne.vi v0, v10, 0
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, tu, mu
-; RV32-NEXT:    vle32.v v8, (a0), v0.t
-; RV32-NEXT:    vse32.v v8, (a3)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vselect_vv_v6f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT:    lbu a2, 0(a2)
-; RV64-NEXT:    vle32.v v8, (a1)
-; RV64-NEXT:    andi a1, a2, 1
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vslide1down.vx v10, v8, a1
-; RV64-NEXT:    slli a1, a2, 62
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 61
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 60
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    slli a1, a2, 59
-; RV64-NEXT:    srli a1, a1, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    srli a2, a2, 5
-; RV64-NEXT:    vslide1down.vx v10, v10, a2
-; RV64-NEXT:    vslidedown.vi v10, v10, 2
-; RV64-NEXT:    vand.vi v10, v10, 1
-; RV64-NEXT:    vmsne.vi v0, v10, 0
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, tu, mu
-; RV64-NEXT:    vle32.v v8, (a0), v0.t
-; RV64-NEXT:    vse32.v v8, (a3)
-; RV64-NEXT:    ret
+; CHECK-LABEL: vselect_vv_v6f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    lbu a2, 0(a2)
+; CHECK-NEXT:    vle32.v v8, (a1)
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vslide1down.vx v10, v8, a2
+; CHECK-NEXT:    srli a1, a2, 1
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    srli a1, a2, 2
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    srli a1, a2, 3
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    srli a1, a2, 4
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    srli a2, a2, 5
+; CHECK-NEXT:    vslide1down.vx v10, v10, a2
+; CHECK-NEXT:    vslidedown.vi v10, v10, 2
+; CHECK-NEXT:    vand.vi v10, v10, 1
+; CHECK-NEXT:    vmsne.vi v0, v10, 0
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, tu, mu
+; CHECK-NEXT:    vle32.v v8, (a0), v0.t
+; CHECK-NEXT:    vse32.v v8, (a3)
+; CHECK-NEXT:    ret
   %va = load <6 x float>, ptr %a
   %vb = load <6 x float>, ptr %b
   %vcc = load <6 x i1>, ptr %cc
@@ -282,67 +140,31 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
 }
 
 define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
-; RV32-LABEL: vselect_vx_v6f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT:    lbu a1, 0(a1)
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    andi a0, a1, 1
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vslide1down.vx v10, v8, a0
-; RV32-NEXT:    slli a0, a1, 30
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a0
-; RV32-NEXT:    slli a0, a1, 29
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a0
-; RV32-NEXT:    slli a0, a1, 28
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a0
-; RV32-NEXT:    slli a0, a1, 27
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a0
-; RV32-NEXT:    srli a1, a1, 5
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    vslidedown.vi v10, v10, 2
-; RV32-NEXT:    vand.vi v10, v10, 1
-; RV32-NEXT:    vmsne.vi v0, v10, 0
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT:    vse32.v v8, (a2)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vselect_vx_v6f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT:    lbu a1, 0(a1)
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    andi a0, a1, 1
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vslide1down.vx v10, v8, a0
-; RV64-NEXT:    slli a0, a1, 62
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a0
-; RV64-NEXT:    slli a0, a1, 61
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a0
-; RV64-NEXT:    slli a0, a1, 60
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a0
-; RV64-NEXT:    slli a0, a1, 59
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a0
-; RV64-NEXT:    srli a1, a1, 5
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    vslidedown.vi v10, v10, 2
-; RV64-NEXT:    vand.vi v10, v10, 1
-; RV64-NEXT:    vmsne.vi v0, v10, 0
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vfmerge.vfm v8, v8, fa0, v0
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT:    vse32.v v8, (a2)
-; RV64-NEXT:    ret
+; CHECK-LABEL: vselect_vx_v6f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    lbu a1, 0(a1)
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vslide1down.vx v10, v8, a1
+; CHECK-NEXT:    srli a0, a1, 1
+; CHECK-NEXT:    vslide1down.vx v10, v10, a0
+; CHECK-NEXT:    srli a0, a1, 2
+; CHECK-NEXT:    vslide1down.vx v10, v10, a0
+; CHECK-NEXT:    srli a0, a1, 3
+; CHECK-NEXT:    vslide1down.vx v10, v10, a0
+; CHECK-NEXT:    srli a0, a1, 4
+; CHECK-NEXT:    vslide1down.vx v10, v10, a0
+; CHECK-NEXT:    srli a1, a1, 5
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    vslidedown.vi v10, v10, 2
+; CHECK-NEXT:    vand.vi v10, v10, 1
+; CHECK-NEXT:    vmsne.vi v0, v10, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vfmerge.vfm v8, v8, fa0, v0
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a2)
+; CHECK-NEXT:    ret
   %vb = load <6 x float>, ptr %b
   %ahead = insertelement <6 x float> poison, float %a, i32 0
   %va = shufflevector <6 x float> %ahead, <6 x float> poison, <6 x i32> zeroinitializer
@@ -353,67 +175,31 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
 }
 
 define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
-; RV32-LABEL: vselect_vfpzero_v6f32:
-; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT:    lbu a1, 0(a1)
-; RV32-NEXT:    vle32.v v8, (a0)
-; RV32-NEXT:    andi a0, a1, 1
-; RV32-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT:    vslide1down.vx v10, v8, a0
-; RV32-NEXT:    slli a0, a1, 30
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a0
-; RV32-NEXT:    slli a0, a1, 29
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a0
-; RV32-NEXT:    slli a0, a1, 28
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a0
-; RV32-NEXT:    slli a0, a1, 27
-; RV32-NEXT:    srli a0, a0, 31
-; RV32-NEXT:    vslide1down.vx v10, v10, a0
-; RV32-NEXT:    srli a1, a1, 5
-; RV32-NEXT:    vslide1down.vx v10, v10, a1
-; RV32-NEXT:    vslidedown.vi v10, v10, 2
-; RV32-NEXT:    vand.vi v10, v10, 1
-; RV32-NEXT:    vmsne.vi v0, v10, 0
-; RV32-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmerge.vim v8, v8, 0, v0
-; RV32-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV32-NEXT:    vse32.v v8, (a2)
-; RV32-NEXT:    ret
-;
-; RV64-LABEL: vselect_vfpzero_v6f32:
-; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT:    lbu a1, 0(a1)
-; RV64-NEXT:    vle32.v v8, (a0)
-; RV64-NEXT:    andi a0, a1, 1
-; RV64-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT:    vslide1down.vx v10, v8, a0
-; RV64-NEXT:    slli a0, a1, 62
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a0
-; RV64-NEXT:    slli a0, a1, 61
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a0
-; RV64-NEXT:    slli a0, a1, 60
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a0
-; RV64-NEXT:    slli a0, a1, 59
-; RV64-NEXT:    srli a0, a0, 63
-; RV64-NEXT:    vslide1down.vx v10, v10, a0
-; RV64-NEXT:    srli a1, a1, 5
-; RV64-NEXT:    vslide1down.vx v10, v10, a1
-; RV64-NEXT:    vslidedown.vi v10, v10, 2
-; RV64-NEXT:    vand.vi v10, v10, 1
-; RV64-NEXT:    vmsne.vi v0, v10, 0
-; RV64-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
-; RV64-NEXT:    vmerge.vim v8, v8, 0, v0
-; RV64-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
-; RV64-NEXT:    vse32.v v8, (a2)
-; RV64-NEXT:    ret
+; CHECK-LABEL: vselect_vfpzero_v6f32:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    lbu a1, 0(a1)
+; CHECK-NEXT:    vle32.v v8, (a0)
+; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
+; CHECK-NEXT:    vslide1down.vx v10, v8, a1
+; CHECK-NEXT:    srli a0, a1, 1
+; CHECK-NEXT:    vslide1down.vx v10, v10, a0
+; CHECK-NEXT:    srli a0, a1, 2
+; CHECK-NEXT:    vslide1down.vx v10, v10, a0
+; CHECK-NEXT:    srli a0, a1, 3
+; CHECK-NEXT:    vslide1down.vx v10, v10, a0
+; CHECK-NEXT:    srli a0, a1, 4
+; CHECK-NEXT:    vslide1down.vx v10, v10, a0
+; CHECK-NEXT:    srli a1, a1, 5
+; CHECK-NEXT:    vslide1down.vx v10, v10, a1
+; CHECK-NEXT:    vslidedown.vi v10, v10, 2
+; CHECK-NEXT:    vand.vi v10, v10, 1
+; CHECK-NEXT:    vmsne.vi v0, v10, 0
+; CHECK-NEXT:    vsetvli zero, zero, e32, m2, ta, ma
+; CHECK-NEXT:    vmerge.vim v8, v8, 0, v0
+; CHECK-NEXT:    vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT:    vse32.v v8, (a2)
+; CHECK-NEXT:    ret
   %vb = load <6 x float>, ptr %b
   %a = insertelement <6 x float> poison, float 0.0, i32 0
   %va = shufflevector <6 x float> %a, <6 x float> poison, <6 x i32> zeroinitializer
@@ -711,3 +497,6 @@ define <64 x i1> @vselect_v64i1(<64 x i1> %a, <64 x i1> %b, <64 x i1> %cc) {
   %v = select <64 x i1> %cc, <64 x i1> %a, <64 x i1> %b
   ret <64 x i1> %v
 }
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; RV32: {{.*}}
+; RV64: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index ee91bae6b6e0271..dcf701be76f62a9 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -725,79 +725,24 @@ define void @test_srem_vec(ptr %X) nounwind {
 ;
 ; RV64MV-LABEL: test_srem_vec:
 ; RV64MV:       # %bb.0:
-; RV64MV-NEXT:    ld a1, 0(a0)
-; RV64MV-NEXT:    lwu a2, 8(a0)
-; RV64MV-NEXT:    srli a3, a1, 2
-; RV64MV-NEXT:    lbu a4, 12(a0)
-; RV64MV-NEXT:    slli a5, a2, 62
-; RV64MV-NEXT:    or a3, a5, a3
-; RV64MV-NEXT:    srai a3, a3, 31
-; RV64MV-NEXT:    slli a4, a4, 32
-; RV64MV-NEXT:    or a2, a2, a4
-; RV64MV-NEXT:    slli a2, a2, 29
-; RV64MV-NEXT:    lui a4, %hi(.LCPI3_0)
-; RV64MV-NEXT:    ld a4, %lo(.LCPI3_0)(a4)
-; RV64MV-NEXT:    srai a2, a2, 31
-; RV64MV-NEXT:    slli a1, a1, 31
-; RV64MV-NEXT:    srai a1, a1, 31
-; RV64MV-NEXT:    mulh a4, a2, a4
-; RV64MV-NEXT:    srli a5, a4, 63
-; RV64MV-NEXT:    srai a4, a4, 1
-; RV64MV-NEXT:    add a4, a4, a5
-; RV64MV-NEXT:    lui a5, %hi(.LCPI3_1)
-; RV64MV-NEXT:    ld a5, %lo(.LCPI3_1)(a5)
-; RV64MV-NEXT:    add a2, a2, a4
-; RV64MV-NEXT:    slli a4, a4, 2
-; RV64MV-NEXT:    add a2, a2, a4
-; RV64MV-NEXT:    mulh a4, a3, a5
-; RV64MV-NEXT:    srli a5, a4, 63
-; RV64MV-NEXT:    srai a4, a4, 1
-; RV64MV-NEXT:    add a4, a4, a5
-; RV64MV-NEXT:    lui a5, %hi(.LCPI3_2)
-; RV64MV-NEXT:    ld a5, %lo(.LCPI3_2)(a5)
-; RV64MV-NEXT:    add a3, a3, a4
-; RV64MV-NEXT:    slli a4, a4, 3
-; RV64MV-NEXT:    sub a3, a3, a4
-; RV64MV-NEXT:    mulh a4, a1, a5
-; RV64MV-NEXT:    srli a5, a4, 63
-; RV64MV-NEXT:    add a4, a4, a5
-; RV64MV-NEXT:    li a5, 6
-; RV64MV-NEXT:    mul a4, a4, a5
-; RV64MV-NEXT:    sub a1, a1, a4
 ; RV64MV-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64MV-NEXT:    vslide1down.vx v8, v8, a1
-; RV64MV-NEXT:    vslide1down.vx v8, v8, a3
-; RV64MV-NEXT:    vslide1down.vx v8, v8, a2
-; RV64MV-NEXT:    vslidedown.vi v8, v8, 1
-; RV64MV-NEXT:    li a1, -1
-; RV64MV-NEXT:    srli a1, a1, 31
-; RV64MV-NEXT:    vand.vx v8, v8, a1
-; RV64MV-NEXT:    lui a2, 32
-; RV64MV-NEXT:    addiw a2, a2, 256
-; RV64MV-NEXT:    vmv.s.x v10, a2
-; RV64MV-NEXT:    vsext.vf8 v12, v10
-; RV64MV-NEXT:    vmsne.vv v0, v8, v12
-; RV64MV-NEXT:    vmv.v.i v8, 0
-; RV64MV-NEXT:    vmerge.vim v8, v8, -1, v0
+; RV64MV-NEXT:    vmv.v.i v8, -1
 ; RV64MV-NEXT:    vsetivli zero, 1, e64, m2, ta, ma
 ; RV64MV-NEXT:    vslidedown.vi v10, v8, 2
-; RV64MV-NEXT:    vmv.x.s a2, v10
-; RV64MV-NEXT:    slli a3, a2, 31
-; RV64MV-NEXT:    srli a3, a3, 61
-; RV64MV-NEXT:    sb a3, 12(a0)
-; RV64MV-NEXT:    vmv.x.s a3, v8
-; RV64MV-NEXT:    and a1, a3, a1
+; RV64MV-NEXT:    vmv.x.s a1, v10
+; RV64MV-NEXT:    slli a2, a1, 31
+; RV64MV-NEXT:    srli a2, a2, 61
+; RV64MV-NEXT:    sb a2, 12(a0)
 ; RV64MV-NEXT:    vsetivli zero, 1, e64, m1, ta, ma
 ; RV64MV-NEXT:    vslidedown.vi v8, v8, 1
-; RV64MV-NEXT:    vmv.x.s a3, v8
-; RV64MV-NEXT:    slli a4, a3, 33
-; RV64MV-NEXT:    or a1, a1, a4
-; RV64MV-NEXT:    sd a1, 0(a0)
-; RV64MV-NEXT:    slli a2, a2, 2
-; RV64MV-NEXT:    slli a3, a3, 31
-; RV64MV-NEXT:    srli a3, a3, 62
-; RV64MV-NEXT:    or a2, a3, a2
-; RV64MV-NEXT:    sw a2, 8(a0)
+; RV64MV-NEXT:    vmv.x.s a2, v8
+; RV64MV-NEXT:    slli a3, a2, 33
+; RV64MV-NEXT:    sd a3, 0(a0)
+; RV64MV-NEXT:    slli a1, a1, 2
+; RV64MV-NEXT:    slli a2, a2, 31
+; RV64MV-NEXT:    srli a2, a2, 62
+; RV64MV-NEXT:    or a1, a2, a1
+; RV64MV-NEXT:    sw a1, 8(a0)
 ; RV64MV-NEXT:    ret
   %ld = load <3 x i33>, ptr %X
   %srem = srem <3 x i33> %ld, <i33 6, i33 7, i33 -5>
diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
index f24b6107f15ada7..456d98fd4e47ffe 100644
--- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll
@@ -521,35 +521,33 @@ define void @test_urem_vec(ptr %X) nounwind {
 ;
 ; RV32MV-LABEL: test_urem_vec:
 ; RV32MV:       # %bb.0:
-; RV32MV-NEXT:    lw a1, 0(a0)
-; RV32MV-NEXT:    andi a2, a1, 2047
+; RV32MV-NEXT:    lbu a1, 4(a0)
+; RV32MV-NEXT:    lw a2, 0(a0)
+; RV32MV-NEXT:    slli a1, a1, 10
+; RV32MV-NEXT:    srli a3, a2, 22
+; RV32MV-NEXT:    or a1, a3, a1
 ; RV32MV-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
 ; RV32MV-NEXT:    vslide1down.vx v8, v8, a2
-; RV32MV-NEXT:    lbu a2, 4(a0)
-; RV32MV-NEXT:    slli a3, a1, 10
-; RV32MV-NEXT:    srli a3, a3, 21
-; RV32MV-NEXT:    vslide1down.vx v8, v8, a3
-; RV32MV-NEXT:    slli a2, a2, 10
-; RV32MV-NEXT:    srli a1, a1, 22
-; RV32MV-NEXT:    or a1, a1, a2
-; RV32MV-NEXT:    andi a1, a1, 2047
+; RV32MV-NEXT:    srli a2, a2, 11
+; RV32MV-NEXT:    vslide1down.vx v8, v8, a2
 ; RV32MV-NEXT:    vslide1down.vx v8, v8, a1
-; RV32MV-NEXT:    lui a1, %hi(.LCPI4_0)
-; RV32MV-NEXT:    addi a1, a1, %lo(.LCPI4_0)
-; RV32MV-NEXT:    vle16.v v9, (a1)
 ; RV32MV-NEXT:    vslidedown.vi v8, v8, 1
+; RV32MV-NEXT:    li a1, 2047
+; RV32MV-NEXT:    lui a2, %hi(.LCPI4_0)
+; RV32MV-NEXT:    addi a2, a2, %lo(.LCPI4_0)
+; RV32MV-NEXT:    vle16.v v9, (a2)
+; RV32MV-NEXT:    vand.vx v8, v8, a1
 ; RV32MV-NEXT:    vid.v v10
 ; RV32MV-NEXT:    vsub.vv v8, v8, v10
 ; RV32MV-NEXT:    vmul.vv v8, v8, v9
 ; RV32MV-NEXT:    vadd.vv v9, v8, v8
-; RV32MV-NEXT:    lui a1, 41121
-; RV32MV-NEXT:    addi a1, a1, -1527
+; RV32MV-NEXT:    lui a2, 41121
+; RV32MV-NEXT:    addi a2, a2, -1527
 ; RV32MV-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV32MV-NEXT:    vmv.s.x v10, a1
+; RV32MV-NEXT:    vmv.s.x v10, a2
 ; RV32MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV32MV-NEXT:    vsext.vf2 v11, v10
 ; RV32MV-NEXT:    vsll.vv v9, v9, v11
-; RV32MV-NEXT:    li a1, 2047
 ; RV32MV-NEXT:    vand.vx v8, v8, a1
 ; RV32MV-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; RV32MV-NEXT:    vmv.v.i v10, 1
@@ -587,30 +585,31 @@ define void @test_urem_vec(ptr %X) nounwind {
 ; RV64MV-NEXT:    lwu a2, 0(a0)
 ; RV64MV-NEXT:    slli a1, a1, 32
 ; RV64MV-NEXT:    or a1, a2, a1
-; RV64MV-NEXT:    andi a2, a1, 2047
 ; RV64MV-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; RV64MV-NEXT:    vslide1down.vx v8, v8, a2
-; RV64MV-NEXT:    slli a2, a1, 42
-; RV64MV-NEXT:    srli a2, a2, 53
+; RV64MV-NEXT:    vslide1down.vx v8, v8, a1
+; RV64MV-NEXT:    slli a1, a1, 24
+; RV64MV-NEXT:    srli a1, a1, 24
+; RV64MV-NEXT:    srli a2, a1, 11
 ; RV64MV-NEXT:    vslide1down.vx v8, v8, a2
 ; RV64MV-NEXT:    srli a1, a1, 22
 ; RV64MV-NEXT:    vslide1down.vx v8, v8, a1
-; RV64MV-NEXT:    lui a1, %hi(.LCPI4_0)
-; RV64MV-NEXT:    addi a1, a1, %lo(.LCPI4_0)
-; RV64MV-NEXT:    vle16.v v9, (a1)
 ; RV64MV-NEXT:    vslidedown.vi v8, v8, 1
+; RV64MV-NEXT:    li a1, 2047
+; RV64MV-NEXT:    lui a2, %hi(.LCPI4_0)
+; RV64MV-NEXT:    addi a2, a2, %lo(.LCPI4_0)
+; RV64MV-NEXT:    vle16.v v9, (a2)
+; RV64MV-NEXT:    vand.vx v8, v8, a1
 ; RV64MV-NEXT:    vid.v v10
 ; RV64MV-NEXT:    vsub.vv v8, v8, v10
 ; RV64MV-NEXT:    vmul.vv v8, v8, v9
 ; RV64MV-NEXT:    vadd.vv v9, v8, v8
-; RV64MV-NEXT:    lui a1, 41121
-; RV64MV-NEXT:    addiw a1, a1, -1527
+; RV64MV-NEXT:    lui a2, 41121
+; RV64MV-NEXT:    addiw a2, a2, -1527
 ; RV64MV-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
-; RV64MV-NEXT:    vmv.s.x v10, a1
+; RV64MV-NEXT:    vmv.s.x v10, a2
 ; RV64MV-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
 ; RV64MV-NEXT:    vsext.vf2 v11, v10
 ; RV64MV-NEXT:    vsll.vv v9, v9, v11
-; RV64MV-NEXT:    li a1, 2047
 ; RV64MV-NEXT:    vand.vx v8, v8, a1
 ; RV64MV-NEXT:    vsetvli zero, zero, e32, m1, ta, ma
 ; RV64MV-NEXT:    vmv.v.i v10, 1

>From b0f82fb335ed4c32b839ef9459a6c43e63c2115b Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail at philipreames.com>
Date: Mon, 25 Sep 2023 15:11:31 -0700
Subject: [PATCH 2/4] Update llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Co-authored-by: Luke Lau <luke_lau at icloud.com>
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c40e940f1b8aeae..c5e46f076225d4b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13338,7 +13338,7 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
 }
 
 /// IF we have a build_vector where each lanes is binop X, C, where C
-/// is a constant (nut not neccessarily the same constant on all lanes),
+/// is a constant (but not necessarily the same constant on all lanes),
 /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
 /// We assume that materializing a constant build vector will be no more
 /// expensive that performing O(n) binops.

>From a4b9f8e8c08b5101555376d61e354f091a9488e7 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail at philipreames.com>
Date: Mon, 25 Sep 2023 15:11:42 -0700
Subject: [PATCH 3/4] Update llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Co-authored-by: Luke Lau <luke_lau at icloud.com>
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c5e46f076225d4b..a0f1e33bc66625b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13337,7 +13337,7 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
   return tryFoldSelectIntoOp(N, DAG, FalseVal, TrueVal, /*Swapped*/true);
 }
 
-/// IF we have a build_vector where each lanes is binop X, C, where C
+/// IF we have a build_vector where each lane is binop X, C, where C
 /// is a constant (but not necessarily the same constant on all lanes),
 /// form binop (build_vector x1, x2, ...), (build_vector c1, c2, c3, ..).
 /// We assume that materializing a constant build vector will be no more

>From 12c98b25d5aaf5c5f6e0c17ffa56b8521b6689cf Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Tue, 26 Sep 2023 07:47:10 -0700
Subject: [PATCH 4/4] clang-format fixes

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a0f1e33bc66625b..c4942f9c637bd8d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -13343,8 +13343,8 @@ static SDValue performSELECTCombine(SDNode *N, SelectionDAG &DAG,
 /// We assume that materializing a constant build vector will be no more
 /// expensive that performing O(n) binops.
 static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
-                                            const RISCVSubtarget &Subtarget,
-                                            const RISCVTargetLowering &TLI) {
+                                          const RISCVSubtarget &Subtarget,
+                                          const RISCVTargetLowering &TLI) {
   SDLoc DL(N);
   EVT VT = N->getValueType(0);
 
@@ -13357,8 +13357,7 @@ static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
   if (!TLI.isBinOp(Opcode))
     return SDValue();
 
-  if (!TLI.isOperationLegalOrCustom(Opcode, VT) ||
-      !TLI.isTypeLegal(VT))
+  if (!TLI.isOperationLegalOrCustom(Opcode, VT) || !TLI.isTypeLegal(VT))
     return SDValue();
 
   SmallVector<SDValue> LHSOps;
@@ -13383,8 +13382,7 @@ static SDValue performBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG,
     RHSOps.push_back(Op.getOperand(1));
   }
 
-  return DAG.getNode(Opcode, DL, VT,
-                     DAG.getBuildVector(VT, DL, LHSOps),
+  return DAG.getNode(Opcode, DL, VT, DAG.getBuildVector(VT, DL, LHSOps),
                      DAG.getBuildVector(VT, DL, RHSOps));
 }
 



More information about the llvm-commits mailing list