[llvm] a8a36ee - [RISCV] Scalarize constant stores of fixed vectors if small enough

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Wed May 24 07:55:16 PDT 2023


Author: Luke Lau
Date: 2023-05-24T15:55:01+01:00
New Revision: a8a36ee5220e1ac74b06f90adab3dec020920e2a

URL: https://github.com/llvm/llvm-project/commit/a8a36ee5220e1ac74b06f90adab3dec020920e2a
DIFF: https://github.com/llvm/llvm-project/commit/a8a36ee5220e1ac74b06f90adab3dec020920e2a.diff

LOG: [RISCV] Scalarize constant stores of fixed vectors if small enough

For stores of small fixed-length vector constants, we can store them
with a sequence of lui/addi/sh/sw to avoid the cost of building the
vector and the vsetivli toggle, provided the constant materialization
cost isn't too high.

This subsumes the optimisation for stores of zeroes in
4dc9a2c5b93682c12d7a80bbe790b14ddb301877

(This is a reapply of 0ca13f9d2701e23af2d000a5d8f48b33fe0878b7)

Reviewed By: reames

Differential Revision: https://reviews.llvm.org/D151221

Added: 
    

Modified: 
    llvm/lib/Target/RISCV/RISCVISelLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9b9fd6e10bc0e..a7af257e08ad0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -12193,18 +12193,32 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
         isPowerOf2_64(MemVT.getSizeInBits()) &&
         MemVT.getSizeInBits() <= Subtarget.getXLen();
 
-    // Using vector to store zeros requires e.g.:
-    //   vsetivli   zero, 2, e64, m1, ta, ma
-    //   vmv.v.i    v8, 0
+    // If sufficiently aligned we can scalarize stores of constant vectors of
+    // any power-of-two size up to XLen bits, provided that they aren't too
+    // expensive to materialize.
+    //   vsetivli   zero, 2, e8, m1, ta, ma
+    //   vmv.v.i    v8, 4
     //   vse64.v    v8, (a0)
-    // If sufficiently aligned, we can use at most one scalar store to zero
-    // initialize any power-of-two size up to XLen bits.
+    // ->
+    //   li     a1, 1028
+    //   sh     a1, 0(a0)
     if (DCI.isBeforeLegalize() && IsScalarizable &&
-        ISD::isBuildVectorAllZeros(Val.getNode())) {
-      auto NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
-      if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+        ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
+      // Get the constant vector bits
+      APInt NewC(Val.getValueSizeInBits(), 0);
+      for (unsigned i = 0; i < Val.getNumOperands(); i++) {
+        if (Val.getOperand(i).isUndef())
+          continue;
+        NewC.insertBits(Val.getConstantOperandAPInt(i),
+                        i * Val.getScalarValueSizeInBits());
+      }
+      MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
+
+      if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(),
+                                     Subtarget.getFeatureBits(), true) <= 2 &&
+          allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
                                          NewVT, *Store->getMemOperand())) {
-        auto NewV = DAG.getConstant(0, DL, NewVT);
+        SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
         return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
                             Store->getPointerInfo(), Store->getOriginalAlign(),
                             Store->getMemOperand()->getFlags());

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index fb753f5a1a546..e51950d1f8290 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -63,108 +63,103 @@ define void @buildvec_vid_mpy_imm_v16i8(ptr %x) {
   ret void
 }
 
-define void @buildvec_vid_step2_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
+; Some tests return this struct because the stores end up being scalarized.
+%x4v4i8 = type {<4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>}
+
+define %x4v4i8 @buildvec_vid_step2_add0_v4i8() {
 ; CHECK-LABEL: buildvec_vid_step2_add0_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vid.v v8
 ; CHECK-NEXT:    vadd.vv v8, v8, v8
-; CHECK-NEXT:    vse8.v v8, (a0)
-; CHECK-NEXT:    vse8.v v8, (a1)
-; CHECK-NEXT:    vse8.v v8, (a2)
-; CHECK-NEXT:    vse8.v v8, (a3)
-; CHECK-NEXT:    ret
-  store <4 x i8> <i8 0, i8 2, i8 4, i8 6>, ptr %z0
-  store <4 x i8> <i8 undef, i8 2, i8 4, i8 6>, ptr %z1
-  store <4 x i8> <i8 undef, i8 undef, i8 4, i8 6>, ptr %z2
-  store <4 x i8> <i8 0, i8 undef, i8 undef, i8 6>, ptr %z3
-  ret void
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    ret
+  %1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 0, i8 2, i8 4, i8 6>, 0
+  %2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 2, i8 4, i8 6>, 1
+  %3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 4, i8 6>, 2
+  %4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 0, i8 undef, i8 undef, i8 6>, 3
+  ret %x4v4i8 %4
 }
 
-define void @buildvec_vid_step2_add1_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
+define %x4v4i8 @buildvec_vid_step2_add1_v4i8() {
 ; CHECK-LABEL: buildvec_vid_step2_add1_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vid.v v8
 ; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    vadd.vi v8, v8, 1
-; CHECK-NEXT:    vse8.v v8, (a0)
-; CHECK-NEXT:    vse8.v v8, (a1)
-; CHECK-NEXT:    vse8.v v8, (a2)
-; CHECK-NEXT:    vse8.v v8, (a3)
-; CHECK-NEXT:    ret
-  store <4 x i8> <i8 1, i8 3, i8 5, i8 7>, ptr %z0
-  store <4 x i8> <i8 undef, i8 3, i8 5, i8 7>, ptr %z1
-  store <4 x i8> <i8 undef, i8 undef, i8 5, i8 7>, ptr %z2
-  store <4 x i8> <i8 1, i8 undef, i8 undef, i8 7>, ptr %z3
-  ret void
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    ret
+  %1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 1, i8 3, i8 5, i8 7>, 0
+  %2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 3, i8 5, i8 7>, 1
+  %3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 5, i8 7>, 2
+  %4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 1, i8 undef, i8 undef, i8 7>, 3
+  ret %x4v4i8 %4
 }
 
 ; FIXME: This could generate vrsub.vi but the (ISD::MUL X, -1) we generate
 ; while lowering ISD::BUILD_VECTOR is custom-lowered to RISCVISD::MUL_VL before
 ; being combined.
-define void @buildvec_vid_stepn1_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
+define %x4v4i8 @buildvec_vid_stepn1_add0_v4i8() {
 ; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vid.v v8
 ; CHECK-NEXT:    vrsub.vi v8, v8, 0
-; CHECK-NEXT:    vse8.v v8, (a0)
-; CHECK-NEXT:    vse8.v v8, (a1)
-; CHECK-NEXT:    vse8.v v8, (a2)
-; CHECK-NEXT:    vse8.v v8, (a3)
-; CHECK-NEXT:    ret
-  store <4 x i8> <i8 0, i8 -1, i8 -2, i8 -3>, ptr %z0
-  store <4 x i8> <i8 undef, i8 -1, i8 -2, i8 -3>, ptr %z1
-  store <4 x i8> <i8 undef, i8 undef, i8 -2, i8 -3>, ptr %z2
-  store <4 x i8> <i8 0, i8 undef, i8 undef, i8 -3>, ptr %z3
-  ret void
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    vmv1r.v v11, v8
+; CHECK-NEXT:    ret
+  %1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 0, i8 -1, i8 -2, i8 -3>, 0
+  %2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 -1, i8 -2, i8 -3>, 1
+  %3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 -2, i8 -3>, 2
+  %4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 0, i8 undef, i8 undef, i8 -3>, 3
+  ret %x4v4i8 %4
 }
 
-define void @buildvec_vid_stepn2_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
+define %x4v4i8 @buildvec_vid_stepn2_add0_v4i8() {
 ; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vid.v v8
 ; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    vrsub.vi v8, v8, 0
-; CHECK-NEXT:    vse8.v v8, (a0)
-; CHECK-NEXT:    vse8.v v8, (a1)
-; CHECK-NEXT:    vse8.v v8, (a2)
-; CHECK-NEXT:    vse8.v v8, (a3)
-; CHECK-NEXT:    ret
-  store <4 x i8> <i8 0, i8 -2, i8 -4, i8 -6>, ptr %z0
-  store <4 x i8> <i8 undef, i8 -2, i8 -4, i8 -6>, ptr %z1
-  store <4 x i8> <i8 undef, i8 undef, i8 -4, i8 -6>, ptr %z2
-  store <4 x i8> <i8 0, i8 undef, i8 undef, i8 -6>, ptr %z3
-  ret void
+; CHECK-NEXT:    vmv.v.i v11, -6
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vmv1r.v v10, v8
+; CHECK-NEXT:    ret
+  %1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 0, i8 -2, i8 -4, i8 -6>, 0
+  %2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 -2, i8 -4, i8 -6>, 1
+  %3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 -4, i8 -6>, 2
+  %4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 undef, i8 undef, i8 undef, i8 -6>, 3
+  ret %x4v4i8 %4
 }
 
-define void @buildvec_vid_stepn2_add3_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
+define <4 x i8> @buildvec_vid_stepn2_add3_v4i8() {
 ; CHECK-LABEL: buildvec_vid_stepn2_add3_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    vid.v v8
 ; CHECK-NEXT:    vadd.vv v8, v8, v8
 ; CHECK-NEXT:    vrsub.vi v8, v8, 3
-; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
-  store <4 x i8> <i8 3, i8 1, i8 -1, i8 -3>, ptr %z0
-  ret void
+  ret <4 x i8> <i8 3, i8 1, i8 -1, i8 -3>
 }
 
-define void @buildvec_vid_stepn3_add3_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
+define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() {
 ; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.i v8, 3
-; CHECK-NEXT:    vid.v v9
-; CHECK-NEXT:    li a1, -3
-; CHECK-NEXT:    vmadd.vx v9, a1, v8
-; CHECK-NEXT:    vse8.v v9, (a0)
+; CHECK-NEXT:    vmv.v.i v9, 3
+; CHECK-NEXT:    vid.v v8
+; CHECK-NEXT:    li a0, -3
+; CHECK-NEXT:    vmadd.vx v8, a0, v9
 ; CHECK-NEXT:    ret
-  store <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>, ptr %z0
-  ret void
+  ret <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>
 }
 
 define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
@@ -235,43 +230,37 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
   ret <4 x i64> <i64 0, i64 2, i64 4, i64 6>
 }
 
-define void @buildvec_no_vid_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5) {
+%x6v4i8 = type {<4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>}
+
+define %x6v4i8 @buildvec_no_vid_v4i8() {
 ; CHECK-LABEL: buildvec_no_vid_v4i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a6, %hi(.LCPI14_0)
-; CHECK-NEXT:    addi a6, a6, %lo(.LCPI14_0)
+; CHECK-NEXT:    lui a0, %hi(.LCPI14_0)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI14_0)
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a6)
-; CHECK-NEXT:    lui a6, %hi(.LCPI14_1)
-; CHECK-NEXT:    addi a6, a6, %lo(.LCPI14_1)
-; CHECK-NEXT:    vle8.v v9, (a6)
-; CHECK-NEXT:    vse8.v v8, (a0)
-; CHECK-NEXT:    vse8.v v9, (a1)
+; CHECK-NEXT:    vle8.v v8, (a0)
+; CHECK-NEXT:    lui a0, %hi(.LCPI14_1)
+; CHECK-NEXT:    addi a0, a0, %lo(.LCPI14_1)
+; CHECK-NEXT:    vle8.v v9, (a0)
 ; CHECK-NEXT:    li a0, 1
 ; CHECK-NEXT:    slli a0, a0, 11
 ; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v8, a0
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vse8.v v8, (a2)
+; CHECK-NEXT:    vmv.v.x v10, a0
 ; CHECK-NEXT:    li a0, 2047
-; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.x v8, a0
+; CHECK-NEXT:    vmv.v.x v11, a0
 ; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
 ; CHECK-NEXT:    lui a0, %hi(.LCPI14_2)
 ; CHECK-NEXT:    addi a0, a0, %lo(.LCPI14_2)
-; CHECK-NEXT:    vle8.v v9, (a0)
-; CHECK-NEXT:    vse8.v v8, (a3)
-; CHECK-NEXT:    vmv.v.i v8, -2
-; CHECK-NEXT:    vse8.v v8, (a4)
-; CHECK-NEXT:    vse8.v v9, (a5)
-; CHECK-NEXT:    ret
-  store <4 x i8> <i8 1, i8 3, i8 6, i8 7>, ptr %z0
-  store <4 x i8> <i8 undef, i8 2, i8 5, i8 7>, ptr %z1
-  store <4 x i8> <i8 0, i8 undef, i8 undef, i8 8>, ptr %z2
-  store <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>, ptr %z3
-  store <4 x i8> <i8 -2, i8 undef, i8 undef, i8 undef>, ptr %z4
-  store <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>, ptr %z5
-  ret void
+; CHECK-NEXT:    vle8.v v13, (a0)
+; CHECK-NEXT:    vmv.v.i v12, -2
+; CHECK-NEXT:    ret
+  %1 = insertvalue %x6v4i8 poison, <4 x i8> <i8 1, i8 3, i8 6, i8 7>, 0
+  %2 = insertvalue %x6v4i8 %1, <4 x i8> <i8 undef, i8 2, i8 5, i8 7>, 1
+  %3 = insertvalue %x6v4i8 %2, <4 x i8> <i8 0, i8 undef, i8 undef, i8 8>, 2
+  %4 = insertvalue %x6v4i8 %3, <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>, 3
+  %5 = insertvalue %x6v4i8 %4, <4 x i8> <i8 -2, i8 undef, i8 undef, i8 undef>, 4
+  %6 = insertvalue %x6v4i8 %5, <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>, 5
+  ret %x6v4i8 %6
 }
 
 define void @buildvec_dominant0_v8i16(ptr %x) {
@@ -300,35 +289,30 @@ define void @buildvec_dominant1_v8i16(ptr %x) {
   ret void
 }
 
-define void @buildvec_dominant0_v2i8(ptr %x) {
+define <2 x i8> @buildvec_dominant0_v2i8() {
 ; CHECK-LABEL: buildvec_dominant0_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ret
-  store <2 x i8> <i8 undef, i8 undef>, ptr %x
-  ret void
+  ret <2 x i8> <i8 undef, i8 undef>
 }
 
-define void @buildvec_dominant1_v2i8(ptr %x) {
+define <2 x i8> @buildvec_dominant1_v2i8() {
 ; CHECK-LABEL: buildvec_dominant1_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, -1
-; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
-  store <2 x i8> <i8 undef, i8 -1>, ptr %x
-  ret void
+  ret <2 x i8> <i8 undef, i8 -1>
 }
 
-define void @buildvec_dominant2_v2i8(ptr %x) {
+define <2 x i8> @buildvec_dominant2_v2i8() {
 ; CHECK-LABEL: buildvec_dominant2_v2i8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
 ; CHECK-NEXT:    vid.v v8
 ; CHECK-NEXT:    vrsub.vi v8, v8, 0
-; CHECK-NEXT:    vse8.v v8, (a0)
 ; CHECK-NEXT:    ret
-  store <2 x i8> <i8 0, i8 -1>, ptr %x
-  ret void
+  ret <2 x i8> <i8 0, i8 -1>
 }
 
 define void @buildvec_dominant0_v2i32(ptr %x) {

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
index 02b6de9e80c3e..1634ead4835ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
@@ -7,17 +7,8 @@
 define void @splat_ones_v1i1(ptr %x) {
 ; CHECK-LABEL: splat_ones_v1i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.i v9, 0
-; CHECK-NEXT:    vsetivli zero, 1, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vi v9, v8, 0
-; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vmsne.vi v8, v9, 0
-; CHECK-NEXT:    vsm.v v8, (a0)
+; CHECK-NEXT:    li a1, 1
+; CHECK-NEXT:    sb a1, 0(a0)
 ; CHECK-NEXT:    ret
   store <1 x i1> <i1 1>, ptr %x
   ret void
@@ -83,17 +74,8 @@ define void @splat_v1i1_icmp(ptr %x, i32 signext %y, i32 signext %z) {
 define void @splat_ones_v4i1(ptr %x) {
 ; CHECK-LABEL: splat_ones_v4i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vmset.m v0
-; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vmv.v.i v9, 0
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT:    vslideup.vi v9, v8, 0
-; CHECK-NEXT:    vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT:    vmsne.vi v8, v9, 0
-; CHECK-NEXT:    vsm.v v8, (a0)
+; CHECK-NEXT:    li a1, 15
+; CHECK-NEXT:    sb a1, 0(a0)
 ; CHECK-NEXT:    ret
   store <4 x i1> <i1 1, i1 1, i1 1, i1 1>, ptr %x
   ret void
@@ -149,9 +131,8 @@ define void @splat_v8i1(ptr %x, i1 %y) {
 define void @splat_ones_v16i1(ptr %x) {
 ; CHECK-LABEL: splat_ones_v16i1:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT:    vmset.m v8
-; CHECK-NEXT:    vsm.v v8, (a0)
+; CHECK-NEXT:    li a1, -1
+; CHECK-NEXT:    sh a1, 0(a0)
 ; CHECK-NEXT:    ret
   store <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, ptr %x
   ret void
@@ -220,16 +201,6 @@ define void @splat_v32i1(ptr %x, i1 %y) {
 }
 
 define void @splat_ones_v64i1(ptr %x) {
-; LMULMAX2-LABEL: splat_ones_v64i1:
-; LMULMAX2:       # %bb.0:
-; LMULMAX2-NEXT:    addi a1, a0, 4
-; LMULMAX2-NEXT:    li a2, 32
-; LMULMAX2-NEXT:    vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT:    vmset.m v8
-; LMULMAX2-NEXT:    vsm.v v8, (a1)
-; LMULMAX2-NEXT:    vsm.v v8, (a0)
-; LMULMAX2-NEXT:    ret
-;
 ; LMULMAX1-RV32-LABEL: splat_ones_v64i1:
 ; LMULMAX1-RV32:       # %bb.0:
 ; LMULMAX1-RV32-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
@@ -245,15 +216,8 @@ define void @splat_ones_v64i1(ptr %x) {
 ;
 ; LMULMAX1-RV64-LABEL: splat_ones_v64i1:
 ; LMULMAX1-RV64:       # %bb.0:
-; LMULMAX1-RV64-NEXT:    vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT:    vmset.m v8
-; LMULMAX1-RV64-NEXT:    vsm.v v8, (a0)
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 6
-; LMULMAX1-RV64-NEXT:    vsm.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    addi a1, a0, 4
-; LMULMAX1-RV64-NEXT:    vsm.v v8, (a1)
-; LMULMAX1-RV64-NEXT:    addi a0, a0, 2
-; LMULMAX1-RV64-NEXT:    vsm.v v8, (a0)
+; LMULMAX1-RV64-NEXT:    li a1, -1
+; LMULMAX1-RV64-NEXT:    sd a1, 0(a0)
 ; LMULMAX1-RV64-NEXT:    ret
   store <64 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, ptr %x
   ret void

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
index 2e437daa27779..846b1b2752464 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
@@ -191,27 +191,27 @@ define void @store_v6i1(ptr %p, <6 x i1> %v) {
 define void @store_constant_v2i8(ptr %p) {
 ; CHECK-LABEL: store_constant_v2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.i v8, 3
-; CHECK-NEXT:    vid.v v9
-; CHECK-NEXT:    li a1, 3
-; CHECK-NEXT:    vmadd.vx v9, a1, v8
-; CHECK-NEXT:    vse8.v v9, (a0)
+; CHECK-NEXT:    li a1, 1539
+; CHECK-NEXT:    sh a1, 0(a0)
 ; CHECK-NEXT:    ret
   store <2 x i8> <i8 3, i8 6>, ptr %p
   ret void
 }
 
 define void @store_constant_v2i16(ptr %p) {
-; CHECK-LABEL: store_constant_v2i16:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT:    vmv.v.i v8, 3
-; CHECK-NEXT:    vid.v v9
-; CHECK-NEXT:    li a1, 3
-; CHECK-NEXT:    vmadd.vx v9, a1, v8
-; CHECK-NEXT:    vse16.v v9, (a0)
-; CHECK-NEXT:    ret
+; RV32-LABEL: store_constant_v2i16:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, 96
+; RV32-NEXT:    addi a1, a1, 3
+; RV32-NEXT:    sw a1, 0(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: store_constant_v2i16:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 96
+; RV64-NEXT:    addiw a1, a1, 3
+; RV64-NEXT:    sw a1, 0(a0)
+; RV64-NEXT:    ret
   store <2 x i16> <i16 3, i16 6>, ptr %p
   ret void
 }
@@ -231,14 +231,19 @@ define void @store_constant_v2i32(ptr %p) {
 }
 
 define void @store_constant_v4i8(ptr %p) {
-; CHECK-LABEL: store_constant_v4i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    lui a1, %hi(.LCPI12_0)
-; CHECK-NEXT:    addi a1, a1, %lo(.LCPI12_0)
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vle8.v v8, (a1)
-; CHECK-NEXT:    vse8.v v8, (a0)
-; CHECK-NEXT:    ret
+; RV32-LABEL: store_constant_v4i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, 4176
+; RV32-NEXT:    addi a1, a1, 1539
+; RV32-NEXT:    sw a1, 0(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: store_constant_v4i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 4176
+; RV64-NEXT:    addiw a1, a1, 1539
+; RV64-NEXT:    sw a1, 0(a0)
+; RV64-NEXT:    ret
   store <4 x i8> <i8 3, i8 6, i8 5, i8 1>, ptr %p
   ret void
 }
@@ -270,12 +275,19 @@ define void @store_constant_v4i32(ptr %p) {
 }
 
 define void @store_id_v4i8(ptr %p) {
-; CHECK-LABEL: store_id_v4i8:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT:    vid.v v8
-; CHECK-NEXT:    vse8.v v8, (a0)
-; CHECK-NEXT:    ret
+; RV32-LABEL: store_id_v4i8:
+; RV32:       # %bb.0:
+; RV32-NEXT:    lui a1, 12320
+; RV32-NEXT:    addi a1, a1, 256
+; RV32-NEXT:    sw a1, 0(a0)
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: store_id_v4i8:
+; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 12320
+; RV64-NEXT:    addiw a1, a1, 256
+; RV64-NEXT:    sw a1, 0(a0)
+; RV64-NEXT:    ret
   store <4 x i8> <i8 0, i8 1, i8 2, i8 3>, ptr %p
   ret void
 }
@@ -297,9 +309,8 @@ define void @store_constant_v2i8_align1(ptr %p) {
 define void @store_constant_splat_v2i8(ptr %p) {
 ; CHECK-LABEL: store_constant_splat_v2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.i v8, 3
-; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    li a1, 771
+; CHECK-NEXT:    sh a1, 0(a0)
 ; CHECK-NEXT:    ret
   store <2 x i8> <i8 3, i8 3>, ptr %p
   ret void
@@ -308,9 +319,8 @@ define void @store_constant_splat_v2i8(ptr %p) {
 define void @store_constant_undef_v2i8(ptr %p) {
 ; CHECK-LABEL: store_constant_undef_v2i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT:    vmv.v.i v8, 3
-; CHECK-NEXT:    vse8.v v8, (a0)
+; CHECK-NEXT:    li a1, 768
+; CHECK-NEXT:    sh a1, 0(a0)
 ; CHECK-NEXT:    ret
   store <2 x i8> <i8 undef, i8 3>, ptr %p
   ret void


        


More information about the llvm-commits mailing list