[llvm] a8a36ee - [RISCV] Scalarize constant stores of fixed vectors if small enough
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed May 24 07:55:16 PDT 2023
Author: Luke Lau
Date: 2023-05-24T15:55:01+01:00
New Revision: a8a36ee5220e1ac74b06f90adab3dec020920e2a
URL: https://github.com/llvm/llvm-project/commit/a8a36ee5220e1ac74b06f90adab3dec020920e2a
DIFF: https://github.com/llvm/llvm-project/commit/a8a36ee5220e1ac74b06f90adab3dec020920e2a.diff
LOG: [RISCV] Scalarize constant stores of fixed vectors if small enough
For stores of small fixed-length vector constants, we can store them
with a sequence of lui/addi/sh/sw to avoid the cost of building the
vector and the vsetivli toggle, provided the constant materialization
cost isn't too high.
This subsumes the optimisation for stores of zeroes in
4dc9a2c5b93682c12d7a80bbe790b14ddb301877
(This is a reapply of 0ca13f9d2701e23af2d000a5d8f48b33fe0878b7)
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D151221
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9b9fd6e10bc0e..a7af257e08ad0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -12193,18 +12193,32 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
isPowerOf2_64(MemVT.getSizeInBits()) &&
MemVT.getSizeInBits() <= Subtarget.getXLen();
- // Using vector to store zeros requires e.g.:
- // vsetivli zero, 2, e64, m1, ta, ma
- // vmv.v.i v8, 0
+ // If sufficiently aligned we can scalarize stores of constant vectors of
+ // any power-of-two size up to XLen bits, provided that they aren't too
+ // expensive to materialize.
+ // vsetivli zero, 2, e8, m1, ta, ma
+ // vmv.v.i v8, 4
// vse64.v v8, (a0)
- // If sufficiently aligned, we can use at most one scalar store to zero
- // initialize any power-of-two size up to XLen bits.
+ // ->
+ // li a1, 1028
+ // sh a1, 0(a0)
if (DCI.isBeforeLegalize() && IsScalarizable &&
- ISD::isBuildVectorAllZeros(Val.getNode())) {
- auto NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
- if (allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
+ ISD::isBuildVectorOfConstantSDNodes(Val.getNode())) {
+ // Get the constant vector bits
+ APInt NewC(Val.getValueSizeInBits(), 0);
+ for (unsigned i = 0; i < Val.getNumOperands(); i++) {
+ if (Val.getOperand(i).isUndef())
+ continue;
+ NewC.insertBits(Val.getConstantOperandAPInt(i),
+ i * Val.getScalarValueSizeInBits());
+ }
+ MVT NewVT = MVT::getIntegerVT(MemVT.getSizeInBits());
+
+ if (RISCVMatInt::getIntMatCost(NewC, Subtarget.getXLen(),
+ Subtarget.getFeatureBits(), true) <= 2 &&
+ allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(),
NewVT, *Store->getMemOperand())) {
- auto NewV = DAG.getConstant(0, DL, NewVT);
+ SDValue NewV = DAG.getConstant(NewC, DL, NewVT);
return DAG.getStore(Chain, DL, NewV, Store->getBasePtr(),
Store->getPointerInfo(), Store->getOriginalAlign(),
Store->getMemOperand()->getFlags());
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index fb753f5a1a546..e51950d1f8290 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -63,108 +63,103 @@ define void @buildvec_vid_mpy_imm_v16i8(ptr %x) {
ret void
}
-define void @buildvec_vid_step2_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
+; Some tests return this struct because the stores end up being scalarized.
+%x4v4i8 = type {<4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>}
+
+define %x4v4i8 @buildvec_vid_step2_add0_v4i8() {
; CHECK-LABEL: buildvec_vid_step2_add0_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: vse8.v v8, (a1)
-; CHECK-NEXT: vse8.v v8, (a2)
-; CHECK-NEXT: vse8.v v8, (a3)
-; CHECK-NEXT: ret
- store <4 x i8> <i8 0, i8 2, i8 4, i8 6>, ptr %z0
- store <4 x i8> <i8 undef, i8 2, i8 4, i8 6>, ptr %z1
- store <4 x i8> <i8 undef, i8 undef, i8 4, i8 6>, ptr %z2
- store <4 x i8> <i8 0, i8 undef, i8 undef, i8 6>, ptr %z3
- ret void
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: ret
+ %1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 0, i8 2, i8 4, i8 6>, 0
+ %2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 2, i8 4, i8 6>, 1
+ %3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 4, i8 6>, 2
+ %4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 0, i8 undef, i8 undef, i8 6>, 3
+ ret %x4v4i8 %4
}
-define void @buildvec_vid_step2_add1_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
+define %x4v4i8 @buildvec_vid_step2_add1_v4i8() {
; CHECK-LABEL: buildvec_vid_step2_add1_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vadd.vi v8, v8, 1
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: vse8.v v8, (a1)
-; CHECK-NEXT: vse8.v v8, (a2)
-; CHECK-NEXT: vse8.v v8, (a3)
-; CHECK-NEXT: ret
- store <4 x i8> <i8 1, i8 3, i8 5, i8 7>, ptr %z0
- store <4 x i8> <i8 undef, i8 3, i8 5, i8 7>, ptr %z1
- store <4 x i8> <i8 undef, i8 undef, i8 5, i8 7>, ptr %z2
- store <4 x i8> <i8 1, i8 undef, i8 undef, i8 7>, ptr %z3
- ret void
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: ret
+ %1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 1, i8 3, i8 5, i8 7>, 0
+ %2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 3, i8 5, i8 7>, 1
+ %3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 5, i8 7>, 2
+ %4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 1, i8 undef, i8 undef, i8 7>, 3
+ ret %x4v4i8 %4
}
; FIXME: This could generate vrsub.vi but the (ISD::MUL X, -1) we generate
; while lowering ISD::BUILD_VECTOR is custom-lowered to RISCVISD::MUL_VL before
; being combined.
-define void @buildvec_vid_stepn1_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
+define %x4v4i8 @buildvec_vid_stepn1_add0_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn1_add0_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: vse8.v v8, (a1)
-; CHECK-NEXT: vse8.v v8, (a2)
-; CHECK-NEXT: vse8.v v8, (a3)
-; CHECK-NEXT: ret
- store <4 x i8> <i8 0, i8 -1, i8 -2, i8 -3>, ptr %z0
- store <4 x i8> <i8 undef, i8 -1, i8 -2, i8 -3>, ptr %z1
- store <4 x i8> <i8 undef, i8 undef, i8 -2, i8 -3>, ptr %z2
- store <4 x i8> <i8 0, i8 undef, i8 undef, i8 -3>, ptr %z3
- ret void
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: vmv1r.v v11, v8
+; CHECK-NEXT: ret
+ %1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 0, i8 -1, i8 -2, i8 -3>, 0
+ %2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 -1, i8 -2, i8 -3>, 1
+ %3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 -2, i8 -3>, 2
+ %4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 0, i8 undef, i8 undef, i8 -3>, 3
+ ret %x4v4i8 %4
}
-define void @buildvec_vid_stepn2_add0_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
+define %x4v4i8 @buildvec_vid_stepn2_add0_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn2_add0_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: vse8.v v8, (a1)
-; CHECK-NEXT: vse8.v v8, (a2)
-; CHECK-NEXT: vse8.v v8, (a3)
-; CHECK-NEXT: ret
- store <4 x i8> <i8 0, i8 -2, i8 -4, i8 -6>, ptr %z0
- store <4 x i8> <i8 undef, i8 -2, i8 -4, i8 -6>, ptr %z1
- store <4 x i8> <i8 undef, i8 undef, i8 -4, i8 -6>, ptr %z2
- store <4 x i8> <i8 0, i8 undef, i8 undef, i8 -6>, ptr %z3
- ret void
+; CHECK-NEXT: vmv.v.i v11, -6
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vmv1r.v v10, v8
+; CHECK-NEXT: ret
+ %1 = insertvalue %x4v4i8 poison, <4 x i8> <i8 0, i8 -2, i8 -4, i8 -6>, 0
+ %2 = insertvalue %x4v4i8 %1, <4 x i8> <i8 undef, i8 -2, i8 -4, i8 -6>, 1
+ %3 = insertvalue %x4v4i8 %2, <4 x i8> <i8 undef, i8 undef, i8 -4, i8 -6>, 2
+ %4 = insertvalue %x4v4i8 %3, <4 x i8> <i8 undef, i8 undef, i8 undef, i8 -6>, 3
+ ret %x4v4i8 %4
}
-define void @buildvec_vid_stepn2_add3_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
+define <4 x i8> @buildvec_vid_stepn2_add3_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn2_add3_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vadd.vv v8, v8, v8
; CHECK-NEXT: vrsub.vi v8, v8, 3
-; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
- store <4 x i8> <i8 3, i8 1, i8 -1, i8 -3>, ptr %z0
- ret void
+ ret <4 x i8> <i8 3, i8 1, i8 -1, i8 -3>
}
-define void @buildvec_vid_stepn3_add3_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
+define <4 x i8> @buildvec_vid_stepn3_add3_v4i8() {
; CHECK-LABEL: buildvec_vid_stepn3_add3_v4i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 3
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: li a1, -3
-; CHECK-NEXT: vmadd.vx v9, a1, v8
-; CHECK-NEXT: vse8.v v9, (a0)
+; CHECK-NEXT: vmv.v.i v9, 3
+; CHECK-NEXT: vid.v v8
+; CHECK-NEXT: li a0, -3
+; CHECK-NEXT: vmadd.vx v8, a0, v9
; CHECK-NEXT: ret
- store <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>, ptr %z0
- ret void
+ ret <4 x i8> <i8 3, i8 0, i8 -3, i8 -6>
}
define void @buildvec_vid_stepn3_addn3_v4i32(ptr %z0, ptr %z1, ptr %z2, ptr %z3) {
@@ -235,43 +230,37 @@ define <4 x i64> @buildvec_vid_step2_add0_v4i64() {
ret <4 x i64> <i64 0, i64 2, i64 4, i64 6>
}
-define void @buildvec_no_vid_v4i8(ptr %z0, ptr %z1, ptr %z2, ptr %z3, ptr %z4, ptr %z5) {
+%x6v4i8 = type {<4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>, <4 x i8>}
+
+define %x6v4i8 @buildvec_no_vid_v4i8() {
; CHECK-LABEL: buildvec_no_vid_v4i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a6, %hi(.LCPI14_0)
-; CHECK-NEXT: addi a6, a6, %lo(.LCPI14_0)
+; CHECK-NEXT: lui a0, %hi(.LCPI14_0)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_0)
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vle8.v v8, (a6)
-; CHECK-NEXT: lui a6, %hi(.LCPI14_1)
-; CHECK-NEXT: addi a6, a6, %lo(.LCPI14_1)
-; CHECK-NEXT: vle8.v v9, (a6)
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: vse8.v v9, (a1)
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: lui a0, %hi(.LCPI14_1)
+; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_1)
+; CHECK-NEXT: vle8.v v9, (a0)
; CHECK-NEXT: li a0, 1
; CHECK-NEXT: slli a0, a0, 11
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vse8.v v8, (a2)
+; CHECK-NEXT: vmv.v.x v10, a0
; CHECK-NEXT: li a0, 2047
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v8, a0
+; CHECK-NEXT: vmv.v.x v11, a0
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
; CHECK-NEXT: lui a0, %hi(.LCPI14_2)
; CHECK-NEXT: addi a0, a0, %lo(.LCPI14_2)
-; CHECK-NEXT: vle8.v v9, (a0)
-; CHECK-NEXT: vse8.v v8, (a3)
-; CHECK-NEXT: vmv.v.i v8, -2
-; CHECK-NEXT: vse8.v v8, (a4)
-; CHECK-NEXT: vse8.v v9, (a5)
-; CHECK-NEXT: ret
- store <4 x i8> <i8 1, i8 3, i8 6, i8 7>, ptr %z0
- store <4 x i8> <i8 undef, i8 2, i8 5, i8 7>, ptr %z1
- store <4 x i8> <i8 0, i8 undef, i8 undef, i8 8>, ptr %z2
- store <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>, ptr %z3
- store <4 x i8> <i8 -2, i8 undef, i8 undef, i8 undef>, ptr %z4
- store <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>, ptr %z5
- ret void
+; CHECK-NEXT: vle8.v v13, (a0)
+; CHECK-NEXT: vmv.v.i v12, -2
+; CHECK-NEXT: ret
+ %1 = insertvalue %x6v4i8 poison, <4 x i8> <i8 1, i8 3, i8 6, i8 7>, 0
+ %2 = insertvalue %x6v4i8 %1, <4 x i8> <i8 undef, i8 2, i8 5, i8 7>, 1
+ %3 = insertvalue %x6v4i8 %2, <4 x i8> <i8 0, i8 undef, i8 undef, i8 8>, 2
+ %4 = insertvalue %x6v4i8 %3, <4 x i8> <i8 -1, i8 undef, i8 undef, i8 7>, 3
+ %5 = insertvalue %x6v4i8 %4, <4 x i8> <i8 -2, i8 undef, i8 undef, i8 undef>, 4
+ %6 = insertvalue %x6v4i8 %5, <4 x i8> <i8 -1, i8 -2, i8 -4, i8 -5>, 5
+ ret %x6v4i8 %6
}
define void @buildvec_dominant0_v8i16(ptr %x) {
@@ -300,35 +289,30 @@ define void @buildvec_dominant1_v8i16(ptr %x) {
ret void
}
-define void @buildvec_dominant0_v2i8(ptr %x) {
+define <2 x i8> @buildvec_dominant0_v2i8() {
; CHECK-LABEL: buildvec_dominant0_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: ret
- store <2 x i8> <i8 undef, i8 undef>, ptr %x
- ret void
+ ret <2 x i8> <i8 undef, i8 undef>
}
-define void @buildvec_dominant1_v2i8(ptr %x) {
+define <2 x i8> @buildvec_dominant1_v2i8() {
; CHECK-LABEL: buildvec_dominant1_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vmv.v.i v8, -1
-; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
- store <2 x i8> <i8 undef, i8 -1>, ptr %x
- ret void
+ ret <2 x i8> <i8 undef, i8 -1>
}
-define void @buildvec_dominant2_v2i8(ptr %x) {
+define <2 x i8> @buildvec_dominant2_v2i8() {
; CHECK-LABEL: buildvec_dominant2_v2i8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
; CHECK-NEXT: vid.v v8
; CHECK-NEXT: vrsub.vi v8, v8, 0
-; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
- store <2 x i8> <i8 0, i8 -1>, ptr %x
- ret void
+ ret <2 x i8> <i8 0, i8 -1>
}
define void @buildvec_dominant0_v2i32(ptr %x) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
index 02b6de9e80c3e..1634ead4835ce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
@@ -7,17 +7,8 @@
define void @splat_ones_v1i1(ptr %x) {
; CHECK-LABEL: splat_ones_v1i1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma
-; CHECK-NEXT: vmset.m v0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 0
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v9, v8, 0
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmsne.vi v8, v9, 0
-; CHECK-NEXT: vsm.v v8, (a0)
+; CHECK-NEXT: li a1, 1
+; CHECK-NEXT: sb a1, 0(a0)
; CHECK-NEXT: ret
store <1 x i1> <i1 1>, ptr %x
ret void
@@ -83,17 +74,8 @@ define void @splat_v1i1_icmp(ptr %x, i32 signext %y, i32 signext %z) {
define void @splat_ones_v4i1(ptr %x) {
; CHECK-LABEL: splat_ones_v4i1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vmset.m v0
-; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.i v9, 0
-; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma
-; CHECK-NEXT: vslideup.vi v9, v8, 0
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vmsne.vi v8, v9, 0
-; CHECK-NEXT: vsm.v v8, (a0)
+; CHECK-NEXT: li a1, 15
+; CHECK-NEXT: sb a1, 0(a0)
; CHECK-NEXT: ret
store <4 x i1> <i1 1, i1 1, i1 1, i1 1>, ptr %x
ret void
@@ -149,9 +131,8 @@ define void @splat_v8i1(ptr %x, i1 %y) {
define void @splat_ones_v16i1(ptr %x) {
; CHECK-LABEL: splat_ones_v16i1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; CHECK-NEXT: vmset.m v8
-; CHECK-NEXT: vsm.v v8, (a0)
+; CHECK-NEXT: li a1, -1
+; CHECK-NEXT: sh a1, 0(a0)
; CHECK-NEXT: ret
store <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, ptr %x
ret void
@@ -220,16 +201,6 @@ define void @splat_v32i1(ptr %x, i1 %y) {
}
define void @splat_ones_v64i1(ptr %x) {
-; LMULMAX2-LABEL: splat_ones_v64i1:
-; LMULMAX2: # %bb.0:
-; LMULMAX2-NEXT: addi a1, a0, 4
-; LMULMAX2-NEXT: li a2, 32
-; LMULMAX2-NEXT: vsetvli zero, a2, e8, m2, ta, ma
-; LMULMAX2-NEXT: vmset.m v8
-; LMULMAX2-NEXT: vsm.v v8, (a1)
-; LMULMAX2-NEXT: vsm.v v8, (a0)
-; LMULMAX2-NEXT: ret
-;
; LMULMAX1-RV32-LABEL: splat_ones_v64i1:
; LMULMAX1-RV32: # %bb.0:
; LMULMAX1-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
@@ -245,15 +216,8 @@ define void @splat_ones_v64i1(ptr %x) {
;
; LMULMAX1-RV64-LABEL: splat_ones_v64i1:
; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vmset.m v8
-; LMULMAX1-RV64-NEXT: vsm.v v8, (a0)
-; LMULMAX1-RV64-NEXT: addi a1, a0, 6
-; LMULMAX1-RV64-NEXT: vsm.v v8, (a1)
-; LMULMAX1-RV64-NEXT: addi a1, a0, 4
-; LMULMAX1-RV64-NEXT: vsm.v v8, (a1)
-; LMULMAX1-RV64-NEXT: addi a0, a0, 2
-; LMULMAX1-RV64-NEXT: vsm.v v8, (a0)
+; LMULMAX1-RV64-NEXT: li a1, -1
+; LMULMAX1-RV64-NEXT: sd a1, 0(a0)
; LMULMAX1-RV64-NEXT: ret
store <64 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1>, ptr %x
ret void
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
index 2e437daa27779..846b1b2752464 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
@@ -191,27 +191,27 @@ define void @store_v6i1(ptr %p, <6 x i1> %v) {
define void @store_constant_v2i8(ptr %p) {
; CHECK-LABEL: store_constant_v2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 3
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: li a1, 3
-; CHECK-NEXT: vmadd.vx v9, a1, v8
-; CHECK-NEXT: vse8.v v9, (a0)
+; CHECK-NEXT: li a1, 1539
+; CHECK-NEXT: sh a1, 0(a0)
; CHECK-NEXT: ret
store <2 x i8> <i8 3, i8 6>, ptr %p
ret void
}
define void @store_constant_v2i16(ptr %p) {
-; CHECK-LABEL: store_constant_v2i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 3
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: li a1, 3
-; CHECK-NEXT: vmadd.vx v9, a1, v8
-; CHECK-NEXT: vse16.v v9, (a0)
-; CHECK-NEXT: ret
+; RV32-LABEL: store_constant_v2i16:
+; RV32: # %bb.0:
+; RV32-NEXT: lui a1, 96
+; RV32-NEXT: addi a1, a1, 3
+; RV32-NEXT: sw a1, 0(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: store_constant_v2i16:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, 96
+; RV64-NEXT: addiw a1, a1, 3
+; RV64-NEXT: sw a1, 0(a0)
+; RV64-NEXT: ret
store <2 x i16> <i16 3, i16 6>, ptr %p
ret void
}
@@ -231,14 +231,19 @@ define void @store_constant_v2i32(ptr %p) {
}
define void @store_constant_v4i8(ptr %p) {
-; CHECK-LABEL: store_constant_v4i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI12_0)
-; CHECK-NEXT: addi a1, a1, %lo(.LCPI12_0)
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vle8.v v8, (a1)
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: ret
+; RV32-LABEL: store_constant_v4i8:
+; RV32: # %bb.0:
+; RV32-NEXT: lui a1, 4176
+; RV32-NEXT: addi a1, a1, 1539
+; RV32-NEXT: sw a1, 0(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: store_constant_v4i8:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, 4176
+; RV64-NEXT: addiw a1, a1, 1539
+; RV64-NEXT: sw a1, 0(a0)
+; RV64-NEXT: ret
store <4 x i8> <i8 3, i8 6, i8 5, i8 1>, ptr %p
ret void
}
@@ -270,12 +275,19 @@ define void @store_constant_v4i32(ptr %p) {
}
define void @store_id_v4i8(ptr %p) {
-; CHECK-LABEL: store_id_v4i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; CHECK-NEXT: vid.v v8
-; CHECK-NEXT: vse8.v v8, (a0)
-; CHECK-NEXT: ret
+; RV32-LABEL: store_id_v4i8:
+; RV32: # %bb.0:
+; RV32-NEXT: lui a1, 12320
+; RV32-NEXT: addi a1, a1, 256
+; RV32-NEXT: sw a1, 0(a0)
+; RV32-NEXT: ret
+;
+; RV64-LABEL: store_id_v4i8:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, 12320
+; RV64-NEXT: addiw a1, a1, 256
+; RV64-NEXT: sw a1, 0(a0)
+; RV64-NEXT: ret
store <4 x i8> <i8 0, i8 1, i8 2, i8 3>, ptr %p
ret void
}
@@ -297,9 +309,8 @@ define void @store_constant_v2i8_align1(ptr %p) {
define void @store_constant_splat_v2i8(ptr %p) {
; CHECK-LABEL: store_constant_splat_v2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 3
-; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: li a1, 771
+; CHECK-NEXT: sh a1, 0(a0)
; CHECK-NEXT: ret
store <2 x i8> <i8 3, i8 3>, ptr %p
ret void
@@ -308,9 +319,8 @@ define void @store_constant_splat_v2i8(ptr %p) {
define void @store_constant_undef_v2i8(ptr %p) {
; CHECK-LABEL: store_constant_undef_v2i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.i v8, 3
-; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: li a1, 768
+; CHECK-NEXT: sh a1, 0(a0)
; CHECK-NEXT: ret
store <2 x i8> <i8 undef, i8 3>, ptr %p
ret void
More information about the llvm-commits
mailing list