[llvm] [RISCV] Lower constant build_vectors with few non-sign bits via ZSEXT (PR #65648)

Philip Reames via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 7 10:36:13 PDT 2023


https://github.com/preames created https://github.com/llvm/llvm-project/pull/65648:

If we have a build_vector such as [i64 0, i64 3, i64 1, i64 2], we instead lower this as vsext([i8 0, i8 3, i8 1, i8 2]).  For vectors with 4 or fewer elements, the resulting narrow vector can be generated via scalar materialization.

For shuffles which get lowered to vrgathers, constant build_vectors of small constants are idiomatic.  As such, this change covers all shuffles with an output type of 4 or less.

I deliberately started narrow here.  I think it makes sense to expand this to longer vectors, but we need a more robust profit model on the recursive expansion.  It's questionable if we want to do the zsext if we're going to generate a constant pool load for the narrower type anyways.

One possibility for future exploration is to allow the narrower VT to be less than 8 bits.  We can't use vsext for that, but we could use something analogous to our widening interleave lowering with some extra shifts and ands.

>From 52e8d5a6f2d958f0fc36e3b08d7f2a848c869c23 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Thu, 7 Sep 2023 09:49:44 -0700
Subject: [PATCH] [RISCV] Lower constant build_vectors with few non-sign bits
 via ZSEXT

If we have a build_vector such as [i64 0, i64 3, i64 1, i64 2], we instead lower this as vsext([i8 0, i8 3, i8 1, i8 2]).  For vectors with 4 or fewer elements, the resulting narrow vector can be generated via scalar materialization.

For shuffles which get lowered to vrgathers, constant build_vectors of small constants are idiomatic.  As such, this change covers all shuffles with an output type of 4 or less.

I deliberately started narrow here.  I think it makes sense to expand this to longer vectors, but we need a more robust profit model on the recursive expansion.  It's questionable if we want to do the zsext if we're going to generate a constant pool load for the narrower type anyways.

One possibility for future exploration is to allow the narrower VT to be less than 8 bits.  We can't use vsext for that, but we could use something analogous to our widening interleave lowering with some extra shifts and ands.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp   |  21 +++
 .../RISCV/rvv/fixed-vectors-extract.ll        |  26 ++--
 .../RISCV/rvv/fixed-vectors-fp-interleave.ll  |  36 +++--
 .../RISCV/rvv/fixed-vectors-fp-shuffles.ll    |  36 +++--
 .../RISCV/rvv/fixed-vectors-int-interleave.ll |  36 +++--
 .../RISCV/rvv/fixed-vectors-int-shuffles.ll   |  64 ++++++---
 .../CodeGen/RISCV/rvv/fixed-vectors-int.ll    | 134 +++++++++++-------
 .../RISCV/rvv/fixed-vectors-stepvector.ll     |  77 +++++-----
 .../CodeGen/RISCV/rvv/fixed-vectors-store.ll  |  56 +++++---
 .../RISCV/rvv/vector-interleave-fixed.ll      |  40 +++---
 .../CodeGen/RISCV/srem-seteq-illegal-types.ll |   9 +-
 11 files changed, 343 insertions(+), 192 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 05e656ac817027c..2e7bf1223468ac1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3456,6 +3456,27 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
   if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget))
     return Res;
 
+  // IF the number of signbits allows, see if we can lower as a <N x i8>.
+  // We restrict this to N <= 4 to ensure the resulting narrow vector is
+  // 32 bits of smaller and can thus be materialized cheaply from scalar.
+  // The main motivation for this is the constant index vector required
+  // by vrgather.vv.  This covers all indice vectors up to size 4.
+  // TODO: We really should be costing the smaller vector.  There are
+  // profitable cases this misses.
+  const unsigned ScalarSize =
+    Op.getSimpleValueType().getScalarSizeInBits();
+  if (ScalarSize > 8 && NumElts <= 4) {
+    unsigned SignBits = DAG.ComputeNumSignBits(Op);
+    if (ScalarSize - SignBits < 8) {
+      SDValue Source =
+        DAG.getNode(ISD::TRUNCATE, DL, VT.changeVectorElementType(MVT::i8), Op);
+      Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8),
+                                       Source, DAG, Subtarget);
+      SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL);
+      return convertFromScalableVector(VT, Res, DAG, Subtarget);
+    }
+  }
+
   // For constant vectors, use generic constant pool lowering.  Otherwise,
   // we'd have to materialize constants in GPRs just to move them into the
   // vector.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index 8ea9c15e86208e1..decd6ae09975402 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -806,18 +806,19 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
 ; RV32NOM:       # %bb.0:
 ; RV32NOM-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32NOM-NEXT:    vmv.v.i v9, 0
-; RV32NOM-NEXT:    li a0, -1
-; RV32NOM-NEXT:    vslide1down.vx v9, v9, a0
 ; RV32NOM-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV32NOM-NEXT:    addi a0, a0, %lo(.LCPI42_0)
 ; RV32NOM-NEXT:    vle32.v v10, (a0)
-; RV32NOM-NEXT:    lui a0, %hi(.LCPI42_1)
-; RV32NOM-NEXT:    addi a0, a0, %lo(.LCPI42_1)
-; RV32NOM-NEXT:    vle32.v v11, (a0)
+; RV32NOM-NEXT:    li a0, -1
+; RV32NOM-NEXT:    vslide1down.vx v9, v9, a0
 ; RV32NOM-NEXT:    vand.vv v9, v8, v9
 ; RV32NOM-NEXT:    vmulh.vv v8, v8, v10
 ; RV32NOM-NEXT:    vadd.vv v8, v8, v9
-; RV32NOM-NEXT:    vsra.vv v9, v8, v11
+; RV32NOM-NEXT:    lui a0, 12320
+; RV32NOM-NEXT:    addi a0, a0, 257
+; RV32NOM-NEXT:    vmv.s.x v9, a0
+; RV32NOM-NEXT:    vsext.vf4 v10, v9
+; RV32NOM-NEXT:    vsra.vv v9, v8, v10
 ; RV32NOM-NEXT:    vsrl.vi v8, v8, 31
 ; RV32NOM-NEXT:    vadd.vv v8, v9, v8
 ; RV32NOM-NEXT:    vslidedown.vi v8, v8, 2
@@ -841,18 +842,19 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) {
 ; RV64NOM:       # %bb.0:
 ; RV64NOM-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV64NOM-NEXT:    vmv.v.i v9, 0
-; RV64NOM-NEXT:    li a0, -1
-; RV64NOM-NEXT:    vslide1down.vx v9, v9, a0
 ; RV64NOM-NEXT:    lui a0, %hi(.LCPI42_0)
 ; RV64NOM-NEXT:    addi a0, a0, %lo(.LCPI42_0)
 ; RV64NOM-NEXT:    vle32.v v10, (a0)
-; RV64NOM-NEXT:    lui a0, %hi(.LCPI42_1)
-; RV64NOM-NEXT:    addi a0, a0, %lo(.LCPI42_1)
-; RV64NOM-NEXT:    vle32.v v11, (a0)
+; RV64NOM-NEXT:    li a0, -1
+; RV64NOM-NEXT:    vslide1down.vx v9, v9, a0
 ; RV64NOM-NEXT:    vand.vv v9, v8, v9
 ; RV64NOM-NEXT:    vmulh.vv v8, v8, v10
 ; RV64NOM-NEXT:    vadd.vv v8, v8, v9
-; RV64NOM-NEXT:    vsra.vv v8, v8, v11
+; RV64NOM-NEXT:    lui a0, 12320
+; RV64NOM-NEXT:    addiw a0, a0, 257
+; RV64NOM-NEXT:    vmv.s.x v9, a0
+; RV64NOM-NEXT:    vsext.vf4 v10, v9
+; RV64NOM-NEXT:    vsra.vv v8, v8, v10
 ; RV64NOM-NEXT:    vsrl.vi v9, v8, 31
 ; RV64NOM-NEXT:    vadd.vv v8, v8, v9
 ; RV64NOM-NEXT:    vslidedown.vi v8, v8, 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
index cc6abfd8f2645f7..6fa9cddde622ce5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -435,40 +435,48 @@ define <4 x float> @unary_interleave_v4f32(<4 x float> %x) {
 define <4 x double> @unary_interleave_v4f64(<4 x double> %x) {
 ; RV32-V128-LABEL: unary_interleave_v4f64:
 ; RV32-V128:       # %bb.0:
-; RV32-V128-NEXT:    lui a0, %hi(.LCPI13_0)
-; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI13_0)
-; RV32-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-V128-NEXT:    vle16.v v12, (a0)
+; RV32-V128-NEXT:    lui a0, 12304
+; RV32-V128-NEXT:    addi a0, a0, 512
+; RV32-V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-V128-NEXT:    vmv.s.x v10, a0
+; RV32-V128-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-V128-NEXT:    vsext.vf2 v12, v10
+; RV32-V128-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV32-V128-NEXT:    vrgatherei16.vv v10, v8, v12
 ; RV32-V128-NEXT:    vmv.v.v v8, v10
 ; RV32-V128-NEXT:    ret
 ;
 ; RV64-V128-LABEL: unary_interleave_v4f64:
 ; RV64-V128:       # %bb.0:
-; RV64-V128-NEXT:    lui a0, %hi(.LCPI13_0)
-; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI13_0)
+; RV64-V128-NEXT:    lui a0, 12304
+; RV64-V128-NEXT:    addiw a0, a0, 512
 ; RV64-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-V128-NEXT:    vle64.v v12, (a0)
+; RV64-V128-NEXT:    vmv.s.x v10, a0
+; RV64-V128-NEXT:    vsext.vf8 v12, v10
 ; RV64-V128-NEXT:    vrgather.vv v10, v8, v12
 ; RV64-V128-NEXT:    vmv.v.v v8, v10
 ; RV64-V128-NEXT:    ret
 ;
 ; RV32-V512-LABEL: unary_interleave_v4f64:
 ; RV32-V512:       # %bb.0:
-; RV32-V512-NEXT:    lui a0, %hi(.LCPI13_0)
-; RV32-V512-NEXT:    addi a0, a0, %lo(.LCPI13_0)
-; RV32-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
-; RV32-V512-NEXT:    vle16.v v10, (a0)
+; RV32-V512-NEXT:    lui a0, 12304
+; RV32-V512-NEXT:    addi a0, a0, 512
+; RV32-V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; RV32-V512-NEXT:    vmv.s.x v9, a0
+; RV32-V512-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; RV32-V512-NEXT:    vsext.vf2 v10, v9
+; RV32-V512-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
 ; RV32-V512-NEXT:    vrgatherei16.vv v9, v8, v10
 ; RV32-V512-NEXT:    vmv.v.v v8, v9
 ; RV32-V512-NEXT:    ret
 ;
 ; RV64-V512-LABEL: unary_interleave_v4f64:
 ; RV64-V512:       # %bb.0:
-; RV64-V512-NEXT:    lui a0, %hi(.LCPI13_0)
-; RV64-V512-NEXT:    addi a0, a0, %lo(.LCPI13_0)
+; RV64-V512-NEXT:    lui a0, 12304
+; RV64-V512-NEXT:    addiw a0, a0, 512
 ; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
-; RV64-V512-NEXT:    vle64.v v10, (a0)
+; RV64-V512-NEXT:    vmv.s.x v9, a0
+; RV64-V512-NEXT:    vsext.vf8 v10, v9
 ; RV64-V512-NEXT:    vrgather.vv v9, v8, v10
 ; RV64-V512-NEXT:    vmv.v.v v8, v9
 ; RV64-V512-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
index 9fe1eb7f7ed2bc8..8d66248a1e57df4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
@@ -57,20 +57,24 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) {
 define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
 ; RV32-LABEL: vrgather_permute_shuffle_vu_v4f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI4_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI4_0)
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vle16.v v12, (a0)
+; RV32-NEXT:    lui a0, 4096
+; RV32-NEXT:    addi a0, a0, 513
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v10, a0
+; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-NEXT:    vsext.vf2 v12, v10
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV32-NEXT:    vrgatherei16.vv v10, v8, v12
 ; RV32-NEXT:    vmv.v.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vrgather_permute_shuffle_vu_v4f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI4_0)
-; RV64-NEXT:    addi a0, a0, %lo(.LCPI4_0)
+; RV64-NEXT:    lui a0, 4096
+; RV64-NEXT:    addiw a0, a0, 513
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vle64.v v12, (a0)
+; RV64-NEXT:    vmv.s.x v10, a0
+; RV64-NEXT:    vsext.vf8 v12, v10
 ; RV64-NEXT:    vrgather.vv v10, v8, v12
 ; RV64-NEXT:    vmv.v.v v8, v10
 ; RV64-NEXT:    ret
@@ -81,20 +85,24 @@ define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) {
 define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) {
 ; RV32-LABEL: vrgather_permute_shuffle_uv_v4f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    lui a0, %hi(.LCPI5_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI5_0)
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vle16.v v12, (a0)
+; RV32-NEXT:    lui a0, 4096
+; RV32-NEXT:    addi a0, a0, 513
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v10, a0
+; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-NEXT:    vsext.vf2 v12, v10
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV32-NEXT:    vrgatherei16.vv v10, v8, v12
 ; RV32-NEXT:    vmv.v.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vrgather_permute_shuffle_uv_v4f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    lui a0, %hi(.LCPI5_0)
-; RV64-NEXT:    addi a0, a0, %lo(.LCPI5_0)
+; RV64-NEXT:    lui a0, 4096
+; RV64-NEXT:    addiw a0, a0, 513
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    vle64.v v12, (a0)
+; RV64-NEXT:    vmv.s.x v10, a0
+; RV64-NEXT:    vsext.vf8 v12, v10
 ; RV64-NEXT:    vrgather.vv v10, v8, v12
 ; RV64-NEXT:    vmv.v.v v8, v10
 ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index 790ebe82a1e4c8c..f9a64498afacc10 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -668,40 +668,48 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) {
 define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) {
 ; RV32-V128-LABEL: unary_interleave_v4i64:
 ; RV32-V128:       # %bb.0:
-; RV32-V128-NEXT:    lui a0, %hi(.LCPI22_0)
-; RV32-V128-NEXT:    addi a0, a0, %lo(.LCPI22_0)
-; RV32-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-V128-NEXT:    vle16.v v12, (a0)
+; RV32-V128-NEXT:    lui a0, 12304
+; RV32-V128-NEXT:    addi a0, a0, 512
+; RV32-V128-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-V128-NEXT:    vmv.s.x v10, a0
+; RV32-V128-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-V128-NEXT:    vsext.vf2 v12, v10
+; RV32-V128-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV32-V128-NEXT:    vrgatherei16.vv v10, v8, v12
 ; RV32-V128-NEXT:    vmv.v.v v8, v10
 ; RV32-V128-NEXT:    ret
 ;
 ; RV64-V128-LABEL: unary_interleave_v4i64:
 ; RV64-V128:       # %bb.0:
-; RV64-V128-NEXT:    lui a0, %hi(.LCPI22_0)
-; RV64-V128-NEXT:    addi a0, a0, %lo(.LCPI22_0)
+; RV64-V128-NEXT:    lui a0, 12304
+; RV64-V128-NEXT:    addiw a0, a0, 512
 ; RV64-V128-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-V128-NEXT:    vle64.v v12, (a0)
+; RV64-V128-NEXT:    vmv.s.x v10, a0
+; RV64-V128-NEXT:    vsext.vf8 v12, v10
 ; RV64-V128-NEXT:    vrgather.vv v10, v8, v12
 ; RV64-V128-NEXT:    vmv.v.v v8, v10
 ; RV64-V128-NEXT:    ret
 ;
 ; RV32-V512-LABEL: unary_interleave_v4i64:
 ; RV32-V512:       # %bb.0:
-; RV32-V512-NEXT:    lui a0, %hi(.LCPI22_0)
-; RV32-V512-NEXT:    addi a0, a0, %lo(.LCPI22_0)
-; RV32-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
-; RV32-V512-NEXT:    vle16.v v10, (a0)
+; RV32-V512-NEXT:    lui a0, 12304
+; RV32-V512-NEXT:    addi a0, a0, 512
+; RV32-V512-NEXT:    vsetivli zero, 4, e32, mf2, ta, ma
+; RV32-V512-NEXT:    vmv.s.x v9, a0
+; RV32-V512-NEXT:    vsetvli zero, zero, e16, mf4, ta, ma
+; RV32-V512-NEXT:    vsext.vf2 v10, v9
+; RV32-V512-NEXT:    vsetvli zero, zero, e64, m1, ta, ma
 ; RV32-V512-NEXT:    vrgatherei16.vv v9, v8, v10
 ; RV32-V512-NEXT:    vmv.v.v v8, v9
 ; RV32-V512-NEXT:    ret
 ;
 ; RV64-V512-LABEL: unary_interleave_v4i64:
 ; RV64-V512:       # %bb.0:
-; RV64-V512-NEXT:    lui a0, %hi(.LCPI22_0)
-; RV64-V512-NEXT:    addi a0, a0, %lo(.LCPI22_0)
+; RV64-V512-NEXT:    lui a0, 12304
+; RV64-V512-NEXT:    addiw a0, a0, 512
 ; RV64-V512-NEXT:    vsetivli zero, 4, e64, m1, ta, ma
-; RV64-V512-NEXT:    vle64.v v10, (a0)
+; RV64-V512-NEXT:    vmv.s.x v9, a0
+; RV64-V512-NEXT:    vsext.vf8 v10, v9
 ; RV64-V512-NEXT:    vrgather.vv v9, v8, v10
 ; RV64-V512-NEXT:    vmv.v.v v8, v9
 ; RV64-V512-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 3f2b2d3c2a81848..b4f3a0fb197a53b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -51,29 +51,57 @@ define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) {
 }
 
 define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) {
-; CHECK-LABEL: vrgather_permute_shuffle_vu_v4i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI4_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI4_0)
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vrgather.vv v9, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
+; RV32-LABEL: vrgather_permute_shuffle_vu_v4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, 4096
+; RV32-NEXT:    addi a0, a0, 513
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v9, a0
+; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-NEXT:    vsext.vf2 v10, v9
+; RV32-NEXT:    vrgather.vv v9, v8, v10
+; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vrgather_permute_shuffle_vu_v4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a0, 4096
+; RV64-NEXT:    addiw a0, a0, 513
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v9, a0
+; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV64-NEXT:    vsext.vf2 v10, v9
+; RV64-NEXT:    vrgather.vv v9, v8, v10
+; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    ret
   %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 0, i32 1>
   ret <4 x i16> %s
 }
 
 define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) {
-; CHECK-LABEL: vrgather_permute_shuffle_uv_v4i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a0, %hi(.LCPI5_0)
-; CHECK-NEXT:    addi a0, a0, %lo(.LCPI5_0)
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a0)
-; CHECK-NEXT:    vrgather.vv v9, v8, v10
-; CHECK-NEXT:    vmv1r.v v8, v9
-; CHECK-NEXT:    ret
+; RV32-LABEL: vrgather_permute_shuffle_uv_v4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a0, 4096
+; RV32-NEXT:    addi a0, a0, 513
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v9, a0
+; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-NEXT:    vsext.vf2 v10, v9
+; RV32-NEXT:    vrgather.vv v9, v8, v10
+; RV32-NEXT:    vmv1r.v v8, v9
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: vrgather_permute_shuffle_uv_v4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a0, 4096
+; RV64-NEXT:    addiw a0, a0, 513
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v9, a0
+; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV64-NEXT:    vsext.vf2 v10, v9
+; RV64-NEXT:    vrgather.vv v9, v8, v10
+; RV64-NEXT:    vmv1r.v v8, v9
+; RV64-NEXT:    ret
   %s = shufflevector <4 x i16> poison, <4 x i16> %x, <4 x i32> <i32 5, i32 6, i32 4, i32 5>
   ret <4 x i16> %s
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 68309001f445b32..dbf7dfbcab49cb1 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -1238,27 +1238,53 @@ define void @mulhu_v8i16(ptr %x) {
 }
 
 define void @mulhu_v6i16(ptr %x) {
-; CHECK-LABEL: mulhu_v6i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a0)
-; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vid.v v9
-; CHECK-NEXT:    vadd.vi v9, v9, 12
-; CHECK-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
-; CHECK-NEXT:    vslidedown.vi v10, v8, 4
-; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vdivu.vv v9, v10, v9
-; CHECK-NEXT:    lui a1, %hi(.LCPI67_0)
-; CHECK-NEXT:    addi a1, a1, %lo(.LCPI67_0)
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vle16.v v10, (a1)
-; CHECK-NEXT:    vdivu.vv v8, v8, v10
-; CHECK-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
-; CHECK-NEXT:    vslideup.vi v8, v9, 4
-; CHECK-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
-; CHECK-NEXT:    vse16.v v8, (a0)
-; CHECK-NEXT:    ret
+; RV32-LABEL: mulhu_v6i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; RV32-NEXT:    vle16.v v8, (a0)
+; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT:    vid.v v9
+; RV32-NEXT:    vadd.vi v9, v9, 12
+; RV32-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
+; RV32-NEXT:    vslidedown.vi v10, v8, 4
+; RV32-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; RV32-NEXT:    vdivu.vv v9, v10, v9
+; RV32-NEXT:    lui a1, 45217
+; RV32-NEXT:    addi a1, a1, -1785
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v10, a1
+; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-NEXT:    vsext.vf2 v11, v10
+; RV32-NEXT:    vdivu.vv v8, v8, v11
+; RV32-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT:    vslideup.vi v8, v9, 4
+; RV32-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; RV32-NEXT:    vse16.v v8, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: mulhu_v6i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; RV64-NEXT:    vle16.v v8, (a0)
+; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT:    vid.v v9
+; RV64-NEXT:    vadd.vi v9, v9, 12
+; RV64-NEXT:    vsetivli zero, 2, e16, m1, ta, ma
+; RV64-NEXT:    vslidedown.vi v10, v8, 4
+; RV64-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
+; RV64-NEXT:    vdivu.vv v9, v10, v9
+; RV64-NEXT:    lui a1, 45217
+; RV64-NEXT:    addiw a1, a1, -1785
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v10, a1
+; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV64-NEXT:    vsext.vf2 v11, v10
+; RV64-NEXT:    vdivu.vv v8, v8, v11
+; RV64-NEXT:    vsetivli zero, 8, e16, m1, ta, ma
+; RV64-NEXT:    vslideup.vi v8, v9, 4
+; RV64-NEXT:    vsetivli zero, 6, e16, m1, ta, ma
+; RV64-NEXT:    vse16.v v8, (a0)
+; RV64-NEXT:    ret
   %a = load <6 x i16>, ptr %x
   %b = udiv <6 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13>
   store <6 x i16> %b, ptr %x
@@ -1306,12 +1332,13 @@ define void @mulhu_v2i64(ptr %x) {
 ; RV32-NEXT:    vle32.v v9, (a1)
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vmulhu.vv v8, v8, v9
-; RV32-NEXT:    lui a1, %hi(.LCPI69_1)
-; RV32-NEXT:    addi a1, a1, %lo(.LCPI69_1)
+; RV32-NEXT:    lui a1, 32
+; RV32-NEXT:    addi a1, a1, 1
+; RV32-NEXT:    vmv.s.x v9, a1
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vle32.v v9, (a1)
+; RV32-NEXT:    vsext.vf4 v10, v9
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vsrl.vv v8, v8, v9
+; RV32-NEXT:    vsrl.vv v8, v8, v10
 ; RV32-NEXT:    vse64.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
@@ -5214,16 +5241,17 @@ define void @mulhu_v8i32(ptr %x) {
 ; LMULMAX1-RV64-LABEL: mulhu_v8i32:
 ; LMULMAX1-RV64:       # %bb.0:
 ; LMULMAX1-RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; LMULMAX1-RV64-NEXT:    vle32.v v8, (a0)
 ; LMULMAX1-RV64-NEXT:    addi a1, a0, 16
-; LMULMAX1-RV64-NEXT:    vle32.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    lui a2, %hi(.LCPI183_0)
-; LMULMAX1-RV64-NEXT:    addi a2, a2, %lo(.LCPI183_0)
-; LMULMAX1-RV64-NEXT:    vle32.v v9, (a2)
-; LMULMAX1-RV64-NEXT:    vle32.v v10, (a0)
-; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v10, v9
-; LMULMAX1-RV64-NEXT:    vse32.v v9, (a0)
-; LMULMAX1-RV64-NEXT:    vse32.v v8, (a1)
+; LMULMAX1-RV64-NEXT:    vle32.v v9, (a1)
+; LMULMAX1-RV64-NEXT:    lui a2, 36976
+; LMULMAX1-RV64-NEXT:    addiw a2, a2, 1541
+; LMULMAX1-RV64-NEXT:    vmv.s.x v10, a2
+; LMULMAX1-RV64-NEXT:    vsext.vf4 v11, v10
+; LMULMAX1-RV64-NEXT:    vdivu.vv v9, v9, v11
+; LMULMAX1-RV64-NEXT:    vdivu.vv v8, v8, v11
+; LMULMAX1-RV64-NEXT:    vse32.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    vse32.v v9, (a1)
 ; LMULMAX1-RV64-NEXT:    ret
   %a = load <8 x i32>, ptr %x
   %b = udiv <8 x i32> %a, <i32 5, i32 6, i32 7, i32 9, i32 5, i32 6, i32 7, i32 9>
@@ -5276,13 +5304,14 @@ define void @mulhu_v4i64(ptr %x) {
 ; LMULMAX2-RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; LMULMAX2-RV64-NEXT:    vle64.v v10, (a1)
 ; LMULMAX2-RV64-NEXT:    vmulhu.vv v10, v8, v10
-; LMULMAX2-RV64-NEXT:    lui a1, %hi(.LCPI184_1)
-; LMULMAX2-RV64-NEXT:    addi a1, a1, %lo(.LCPI184_1)
-; LMULMAX2-RV64-NEXT:    vle64.v v14, (a1)
 ; LMULMAX2-RV64-NEXT:    vsub.vv v8, v8, v10
 ; LMULMAX2-RV64-NEXT:    vmulhu.vv v8, v8, v12
 ; LMULMAX2-RV64-NEXT:    vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT:    vsrl.vv v8, v8, v14
+; LMULMAX2-RV64-NEXT:    lui a1, 12320
+; LMULMAX2-RV64-NEXT:    addiw a1, a1, 513
+; LMULMAX2-RV64-NEXT:    vmv.s.x v10, a1
+; LMULMAX2-RV64-NEXT:    vsext.vf8 v12, v10
+; LMULMAX2-RV64-NEXT:    vsrl.vv v8, v8, v12
 ; LMULMAX2-RV64-NEXT:    vse64.v v8, (a0)
 ; LMULMAX2-RV64-NEXT:    ret
 ;
@@ -5292,18 +5321,20 @@ define void @mulhu_v4i64(ptr %x) {
 ; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
 ; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
 ; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    lui a2, %hi(.LCPI184_0)
-; LMULMAX1-RV32-NEXT:    addi a2, a2, %lo(.LCPI184_0)
+; LMULMAX1-RV32-NEXT:    lui a2, 144
+; LMULMAX1-RV32-NEXT:    addi a2, a2, 7
+; LMULMAX1-RV32-NEXT:    vmv.s.x v10, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a2)
+; LMULMAX1-RV32-NEXT:    vsext.vf4 v11, v10
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    lui a2, %hi(.LCPI184_1)
-; LMULMAX1-RV32-NEXT:    addi a2, a2, %lo(.LCPI184_1)
+; LMULMAX1-RV32-NEXT:    vdivu.vv v9, v9, v11
+; LMULMAX1-RV32-NEXT:    lui a2, 80
+; LMULMAX1-RV32-NEXT:    addi a2, a2, 3
+; LMULMAX1-RV32-NEXT:    vmv.s.x v10, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a2)
+; LMULMAX1-RV32-NEXT:    vsext.vf4 v11, v10
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v10
+; LMULMAX1-RV32-NEXT:    vdivu.vv v8, v8, v11
 ; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
 ; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
 ; LMULMAX1-RV32-NEXT:    ret
@@ -5660,13 +5691,14 @@ define void @mulhs_v4i64(ptr %x) {
 ; LMULMAX1-RV32-NEXT:    vle64.v v8, (a0)
 ; LMULMAX1-RV32-NEXT:    addi a1, a0, 16
 ; LMULMAX1-RV32-NEXT:    vle64.v v9, (a1)
-; LMULMAX1-RV32-NEXT:    lui a2, %hi(.LCPI188_0)
-; LMULMAX1-RV32-NEXT:    addi a2, a2, %lo(.LCPI188_0)
+; LMULMAX1-RV32-NEXT:    lui a2, 1048528
+; LMULMAX1-RV32-NEXT:    addi a2, a2, 3
+; LMULMAX1-RV32-NEXT:    vmv.s.x v10, a2
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vle32.v v10, (a2)
+; LMULMAX1-RV32-NEXT:    vsext.vf4 v11, v10
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; LMULMAX1-RV32-NEXT:    vdiv.vv v9, v9, v10
-; LMULMAX1-RV32-NEXT:    vdiv.vv v8, v8, v10
+; LMULMAX1-RV32-NEXT:    vdiv.vv v9, v9, v11
+; LMULMAX1-RV32-NEXT:    vdiv.vv v8, v8, v11
 ; LMULMAX1-RV32-NEXT:    vse64.v v8, (a0)
 ; LMULMAX1-RV32-NEXT:    vse64.v v9, (a1)
 ; LMULMAX1-RV32-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
index f9c49be55986ce3..13e81d30d66a337 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll
@@ -230,10 +230,11 @@ define <4 x i64> @stepvector_v4i64() {
 ; RV32LMULMAX1-NEXT:    vmv.v.i v8, 0
 ; RV32LMULMAX1-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
 ; RV32LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
-; RV32LMULMAX1-NEXT:    lui a0, %hi(.LCPI14_0)
-; RV32LMULMAX1-NEXT:    addi a0, a0, %lo(.LCPI14_0)
+; RV32LMULMAX1-NEXT:    lui a0, 48
+; RV32LMULMAX1-NEXT:    addi a0, a0, 2
+; RV32LMULMAX1-NEXT:    vmv.s.x v10, a0
 ; RV32LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32LMULMAX1-NEXT:    vle32.v v9, (a0)
+; RV32LMULMAX1-NEXT:    vsext.vf4 v9, v10
 ; RV32LMULMAX1-NEXT:    ret
 ;
 ; RV64LMULMAX1-LABEL: stepvector_v4i64:
@@ -270,16 +271,19 @@ define <8 x i64> @stepvector_v8i64() {
 ; RV32LMULMAX1-NEXT:    vmv.v.i v8, 0
 ; RV32LMULMAX1-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
 ; RV32LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
-; RV32LMULMAX1-NEXT:    lui a0, %hi(.LCPI15_0)
-; RV32LMULMAX1-NEXT:    addi a0, a0, %lo(.LCPI15_0)
+; RV32LMULMAX1-NEXT:    lui a0, 48
+; RV32LMULMAX1-NEXT:    addi a0, a0, 2
+; RV32LMULMAX1-NEXT:    vmv.s.x v10, a0
 ; RV32LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32LMULMAX1-NEXT:    vle32.v v9, (a0)
-; RV32LMULMAX1-NEXT:    lui a0, %hi(.LCPI15_1)
-; RV32LMULMAX1-NEXT:    addi a0, a0, %lo(.LCPI15_1)
-; RV32LMULMAX1-NEXT:    vle32.v v10, (a0)
-; RV32LMULMAX1-NEXT:    lui a0, %hi(.LCPI15_2)
-; RV32LMULMAX1-NEXT:    addi a0, a0, %lo(.LCPI15_2)
-; RV32LMULMAX1-NEXT:    vle32.v v11, (a0)
+; RV32LMULMAX1-NEXT:    vsext.vf4 v9, v10
+; RV32LMULMAX1-NEXT:    lui a0, 80
+; RV32LMULMAX1-NEXT:    addi a0, a0, 4
+; RV32LMULMAX1-NEXT:    vmv.s.x v11, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v10, v11
+; RV32LMULMAX1-NEXT:    lui a0, 112
+; RV32LMULMAX1-NEXT:    addi a0, a0, 6
+; RV32LMULMAX1-NEXT:    vmv.s.x v12, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v11, v12
 ; RV32LMULMAX1-NEXT:    ret
 ;
 ; RV64LMULMAX1-LABEL: stepvector_v8i64:
@@ -322,28 +326,35 @@ define <16 x i64> @stepvector_v16i64() {
 ; RV32LMULMAX1-NEXT:    vmv.v.i v8, 0
 ; RV32LMULMAX1-NEXT:    vsetivli zero, 3, e32, m1, tu, ma
 ; RV32LMULMAX1-NEXT:    vslideup.vi v8, v9, 2
-; RV32LMULMAX1-NEXT:    lui a0, %hi(.LCPI16_0)
-; RV32LMULMAX1-NEXT:    addi a0, a0, %lo(.LCPI16_0)
+; RV32LMULMAX1-NEXT:    lui a0, 48
+; RV32LMULMAX1-NEXT:    addi a0, a0, 2
+; RV32LMULMAX1-NEXT:    vmv.s.x v10, a0
 ; RV32LMULMAX1-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32LMULMAX1-NEXT:    vle32.v v9, (a0)
-; RV32LMULMAX1-NEXT:    lui a0, %hi(.LCPI16_1)
-; RV32LMULMAX1-NEXT:    addi a0, a0, %lo(.LCPI16_1)
-; RV32LMULMAX1-NEXT:    vle32.v v10, (a0)
-; RV32LMULMAX1-NEXT:    lui a0, %hi(.LCPI16_2)
-; RV32LMULMAX1-NEXT:    addi a0, a0, %lo(.LCPI16_2)
-; RV32LMULMAX1-NEXT:    vle32.v v11, (a0)
-; RV32LMULMAX1-NEXT:    lui a0, %hi(.LCPI16_3)
-; RV32LMULMAX1-NEXT:    addi a0, a0, %lo(.LCPI16_3)
-; RV32LMULMAX1-NEXT:    vle32.v v12, (a0)
-; RV32LMULMAX1-NEXT:    lui a0, %hi(.LCPI16_4)
-; RV32LMULMAX1-NEXT:    addi a0, a0, %lo(.LCPI16_4)
-; RV32LMULMAX1-NEXT:    vle32.v v13, (a0)
-; RV32LMULMAX1-NEXT:    lui a0, %hi(.LCPI16_5)
-; RV32LMULMAX1-NEXT:    addi a0, a0, %lo(.LCPI16_5)
-; RV32LMULMAX1-NEXT:    vle32.v v14, (a0)
-; RV32LMULMAX1-NEXT:    lui a0, %hi(.LCPI16_6)
-; RV32LMULMAX1-NEXT:    addi a0, a0, %lo(.LCPI16_6)
-; RV32LMULMAX1-NEXT:    vle32.v v15, (a0)
+; RV32LMULMAX1-NEXT:    vsext.vf4 v9, v10
+; RV32LMULMAX1-NEXT:    lui a0, 80
+; RV32LMULMAX1-NEXT:    addi a0, a0, 4
+; RV32LMULMAX1-NEXT:    vmv.s.x v11, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v10, v11
+; RV32LMULMAX1-NEXT:    lui a0, 112
+; RV32LMULMAX1-NEXT:    addi a0, a0, 6
+; RV32LMULMAX1-NEXT:    vmv.s.x v12, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v11, v12
+; RV32LMULMAX1-NEXT:    lui a0, 144
+; RV32LMULMAX1-NEXT:    addi a0, a0, 8
+; RV32LMULMAX1-NEXT:    vmv.s.x v13, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v12, v13
+; RV32LMULMAX1-NEXT:    lui a0, 176
+; RV32LMULMAX1-NEXT:    addi a0, a0, 10
+; RV32LMULMAX1-NEXT:    vmv.s.x v14, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v13, v14
+; RV32LMULMAX1-NEXT:    lui a0, 208
+; RV32LMULMAX1-NEXT:    addi a0, a0, 12
+; RV32LMULMAX1-NEXT:    vmv.s.x v15, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v14, v15
+; RV32LMULMAX1-NEXT:    lui a0, 240
+; RV32LMULMAX1-NEXT:    addi a0, a0, 14
+; RV32LMULMAX1-NEXT:    vmv.s.x v16, a0
+; RV32LMULMAX1-NEXT:    vsext.vf4 v15, v16
 ; RV32LMULMAX1-NEXT:    ret
 ;
 ; RV64LMULMAX1-LABEL: stepvector_v16i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
index e6430f63f5ada4c..fc0564827036763 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
@@ -168,27 +168,51 @@ define void @store_constant_v4i8(ptr %p) {
 }
 
 define void @store_constant_v4i16(ptr %p) {
-; CHECK-LABEL: store_constant_v4i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI13_0)
-; CHECK-NEXT:    addi a1, a1, %lo(.LCPI13_0)
-; CHECK-NEXT:    vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT:    vle16.v v8, (a1)
-; CHECK-NEXT:    vse16.v v8, (a0)
-; CHECK-NEXT:    ret
+; RV32-LABEL: store_constant_v4i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, 4176
+; RV32-NEXT:    addi a1, a1, 1539
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v8, a1
+; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-NEXT:    vsext.vf2 v9, v8
+; RV32-NEXT:    vse16.v v9, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: store_constant_v4i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 4176
+; RV64-NEXT:    addiw a1, a1, 1539
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v8, a1
+; RV64-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV64-NEXT:    vsext.vf2 v9, v8
+; RV64-NEXT:    vse16.v v9, (a0)
+; RV64-NEXT:    ret
   store <4 x i16> <i16 3, i16 6, i16 5, i16 1>, ptr %p
   ret void
 }
 
 define void @store_constant_v4i32(ptr %p) {
-; CHECK-LABEL: store_constant_v4i32:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI14_0)
-; CHECK-NEXT:    addi a1, a1, %lo(.LCPI14_0)
-; CHECK-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a1)
-; CHECK-NEXT:    vse32.v v8, (a0)
-; CHECK-NEXT:    ret
+; RV32-LABEL: store_constant_v4i32:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, 4176
+; RV32-NEXT:    addi a1, a1, 1539
+; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT:    vmv.s.x v8, a1
+; RV32-NEXT:    vsext.vf4 v9, v8
+; RV32-NEXT:    vse32.v v9, (a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: store_constant_v4i32:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 4176
+; RV64-NEXT:    addiw a1, a1, 1539
+; RV64-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT:    vmv.s.x v8, a1
+; RV64-NEXT:    vsext.vf4 v9, v8
+; RV64-NEXT:    vse32.v v9, (a0)
+; RV64-NEXT:    ret
   store <4 x i32> <i32 3, i32 6, i32 5, i32 1>, ptr %p
   ret void
 }
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
index 02927c60dc4dc03..d113c9fd31bbff9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll
@@ -55,24 +55,28 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) {
 define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) {
 ; RV32-LABEL: vector_interleave_v4i64_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI3_0)
-; RV32-NEXT:    vle16.v v12, (a0)
 ; RV32-NEXT:    vmv1r.v v10, v9
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    lui a0, 12304
+; RV32-NEXT:    addi a0, a0, 512
+; RV32-NEXT:    vmv.s.x v10, a0
+; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-NEXT:    vsext.vf2 v12, v10
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV32-NEXT:    vrgatherei16.vv v10, v8, v12
 ; RV32-NEXT:    vmv.v.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vector_interleave_v4i64_v2i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    lui a0, %hi(.LCPI3_0)
-; RV64-NEXT:    addi a0, a0, %lo(.LCPI3_0)
-; RV64-NEXT:    vle64.v v12, (a0)
 ; RV64-NEXT:    vmv1r.v v10, v9
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    lui a0, 12304
+; RV64-NEXT:    addiw a0, a0, 512
+; RV64-NEXT:    vmv.s.x v10, a0
+; RV64-NEXT:    vsext.vf8 v12, v10
 ; RV64-NEXT:    vrgather.vv v10, v8, v12
 ; RV64-NEXT:    vmv.v.v v8, v10
 ; RV64-NEXT:    ret
@@ -155,24 +159,28 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b
 define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> %b) {
 ; RV32-LABEL: vector_interleave_v4f64_v2f64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    lui a0, %hi(.LCPI9_0)
-; RV32-NEXT:    addi a0, a0, %lo(.LCPI9_0)
-; RV32-NEXT:    vle16.v v12, (a0)
 ; RV32-NEXT:    vmv1r.v v10, v9
+; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vslideup.vi v8, v10, 2
+; RV32-NEXT:    lui a0, 12304
+; RV32-NEXT:    addi a0, a0, 512
+; RV32-NEXT:    vmv.s.x v10, a0
+; RV32-NEXT:    vsetvli zero, zero, e16, mf2, ta, ma
+; RV32-NEXT:    vsext.vf2 v12, v10
+; RV32-NEXT:    vsetvli zero, zero, e64, m2, ta, ma
 ; RV32-NEXT:    vrgatherei16.vv v10, v8, v12
 ; RV32-NEXT:    vmv.v.v v8, v10
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vector_interleave_v4f64_v2f64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT:    lui a0, %hi(.LCPI9_0)
-; RV64-NEXT:    addi a0, a0, %lo(.LCPI9_0)
-; RV64-NEXT:    vle64.v v12, (a0)
 ; RV64-NEXT:    vmv1r.v v10, v9
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vslideup.vi v8, v10, 2
+; RV64-NEXT:    lui a0, 12304
+; RV64-NEXT:    addiw a0, a0, 512
+; RV64-NEXT:    vmv.s.x v10, a0
+; RV64-NEXT:    vsext.vf8 v12, v10
 ; RV64-NEXT:    vrgather.vv v10, v8, v12
 ; RV64-NEXT:    vmv.v.v v8, v10
 ; RV64-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
index cac159c3e3a250e..4ece90dac18ac07 100644
--- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
+++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll
@@ -766,13 +766,14 @@ define void @test_srem_vec(ptr %X) nounwind {
 ; RV64MV-NEXT:    vslide1down.vx v8, v8, a3
 ; RV64MV-NEXT:    vslide1down.vx v8, v8, a2
 ; RV64MV-NEXT:    vslidedown.vi v8, v8, 1
-; RV64MV-NEXT:    lui a1, %hi(.LCPI3_3)
-; RV64MV-NEXT:    addi a1, a1, %lo(.LCPI3_3)
-; RV64MV-NEXT:    vle64.v v10, (a1)
 ; RV64MV-NEXT:    li a1, -1
 ; RV64MV-NEXT:    srli a1, a1, 31
 ; RV64MV-NEXT:    vand.vx v8, v8, a1
-; RV64MV-NEXT:    vmsne.vv v0, v8, v10
+; RV64MV-NEXT:    lui a2, 32
+; RV64MV-NEXT:    addiw a2, a2, 256
+; RV64MV-NEXT:    vmv.s.x v10, a2
+; RV64MV-NEXT:    vsext.vf8 v12, v10
+; RV64MV-NEXT:    vmsne.vv v0, v8, v12
 ; RV64MV-NEXT:    vmv.v.i v8, 0
 ; RV64MV-NEXT:    vmerge.vim v8, v8, -1, v0
 ; RV64MV-NEXT:    vsetivli zero, 1, e64, m2, ta, ma



More information about the llvm-commits mailing list