[llvm] 7ed01a4 - [RISCV] Pad v4i1/v2i1/v1i1 stores with 0s to make a full byte.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Mon Apr 19 11:05:25 PDT 2021
Author: Craig Topper
Date: 2021-04-19T11:05:18-07:00
New Revision: 7ed01a420a2deb609b6b111d968a4dc673c68f19
URL: https://github.com/llvm/llvm-project/commit/7ed01a420a2deb609b6b111d968a4dc673c68f19
DIFF: https://github.com/llvm/llvm-project/commit/7ed01a420a2deb609b6b111d968a4dc673c68f19.diff
LOG: [RISCV] Pad v4i1/v2i1/v1i1 stores with 0s to make a full byte.
As noted in the FIXME there's a sort of agreement that the any
extra bits stored will be 0.
The generated code is pretty terrible. I was really hoping we
could use a tail undisturbed trick, but tail undisturbed no
longer applies to masked destinations in the current draft
spec.
Fingers crossed that it isn't common to do this. I doubt IR
from clang or the vectorizer would ever create this kind of store.
Reviewed By: frasercrmck
Differential Revision: https://reviews.llvm.org/D100618
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 01f363fb6bed..cd10cb6eb489 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3717,10 +3717,16 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
auto *Store = cast<StoreSDNode>(Op);
SDLoc DL(Op);
- MVT VT = Store->getValue().getSimpleValueType();
+ SDValue StoreVal = Store->getValue();
+ MVT VT = StoreVal.getSimpleValueType();
- // FIXME: We probably need to zero any extra bits in a byte for mask stores.
- // This is tricky to do.
+ // If the size less than a byte, we need to the unused bits with 0s.
+ if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
+ VT = MVT::v8i1;
+ StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ DAG.getConstant(0, DL, VT), StoreVal,
+ DAG.getIntPtrConstant(0, DL));
+ }
MVT ContainerVT = getContainerForFixedLengthVector(VT);
@@ -3728,7 +3734,7 @@ RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
SDValue NewValue =
- convertToScalableVector(ContainerVT, Store->getValue(), DAG, Subtarget);
+ convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
return DAG.getMemIntrinsicNode(
RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
{Store->getChain(), NewValue, Store->getBasePtr(), VL},
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
index 481c88e91f14..436b5bce9ea2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll
@@ -567,8 +567,16 @@ define void @truncstore_v2i8_v2i1(<2 x i8> %x, <2 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
; CHECK-NEXT: vand.vi v25, v8, 1
-; CHECK-NEXT: vmsne.vi v26, v25, 0
-; CHECK-NEXT: vse1.v v26, (a0)
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%y = trunc <2 x i8> %x to <2 x i1>
store <2 x i1> %y, <2 x i1>* %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
index df520196d7e2..8a4b5c88d1c9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll
@@ -353,16 +353,32 @@ define void @extract_v2i1_v64i1_0(<64 x i1>* %x, <2 x i1>* %y) {
; LMULMAX2: # %bb.0:
; LMULMAX2-NEXT: addi a2, zero, 32
; LMULMAX2-NEXT: vsetvli a2, a2, e8,m2,ta,mu
-; LMULMAX2-NEXT: vle1.v v25, (a0)
+; LMULMAX2-NEXT: vle1.v v0, (a0)
; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX2-NEXT: vmv.v.i v25, 0
+; LMULMAX2-NEXT: vmerge.vim v25, v25, 1, v0
+; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX2-NEXT: vmv.v.i v26, 0
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; LMULMAX2-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX2-NEXT: vmsne.vi v25, v26, 0
; LMULMAX2-NEXT: vse1.v v25, (a1)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: extract_v2i1_v64i1_0:
; LMULMAX1: # %bb.0:
; LMULMAX1-NEXT: vsetivli a2, 16, e8,m1,ta,mu
-; LMULMAX1-NEXT: vle1.v v25, (a0)
+; LMULMAX1-NEXT: vle1.v v0, (a0)
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmsne.vi v25, v26, 0
; LMULMAX1-NEXT: vse1.v v25, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
@@ -382,6 +398,14 @@ define void @extract_v2i1_v64i1_2(<64 x i1>* %x, <2 x i1>* %y) {
; LMULMAX2-NEXT: vsetivli a0, 2, e8,m2,ta,mu
; LMULMAX2-NEXT: vslidedown.vi v26, v26, 2
; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX2-NEXT: vmsne.vi v0, v26, 0
+; LMULMAX2-NEXT: vmv.v.i v25, 0
+; LMULMAX2-NEXT: vmerge.vim v25, v25, 1, v0
+; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX2-NEXT: vmv.v.i v26, 0
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; LMULMAX2-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX2-NEXT: vmsne.vi v25, v26, 0
; LMULMAX2-NEXT: vse1.v v25, (a1)
; LMULMAX2-NEXT: ret
@@ -394,8 +418,16 @@ define void @extract_v2i1_v64i1_2(<64 x i1>* %x, <2 x i1>* %y) {
; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 2
-; LMULMAX1-NEXT: vmsne.vi v26, v25, 0
-; LMULMAX1-NEXT: vse1.v v26, (a1)
+; LMULMAX1-NEXT: vmsne.vi v0, v25, 0
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmsne.vi v25, v26, 0
+; LMULMAX1-NEXT: vse1.v v25, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 2)
@@ -415,6 +447,14 @@ define void @extract_v2i1_v64i1_42(<64 x i1>* %x, <2 x i1>* %y) {
; LMULMAX2-NEXT: vsetivli a0, 2, e8,m2,ta,mu
; LMULMAX2-NEXT: vslidedown.vi v26, v26, 10
; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,ta,mu
+; LMULMAX2-NEXT: vmsne.vi v0, v26, 0
+; LMULMAX2-NEXT: vmv.v.i v25, 0
+; LMULMAX2-NEXT: vmerge.vim v25, v25, 1, v0
+; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX2-NEXT: vmv.v.i v26, 0
+; LMULMAX2-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; LMULMAX2-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX2-NEXT: vsetivli a0, 8, e8,m1,ta,mu
; LMULMAX2-NEXT: vmsne.vi v25, v26, 0
; LMULMAX2-NEXT: vse1.v v25, (a1)
; LMULMAX2-NEXT: ret
@@ -428,8 +468,16 @@ define void @extract_v2i1_v64i1_42(<64 x i1>* %x, <2 x i1>* %y) {
; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0
; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,ta,mu
; LMULMAX1-NEXT: vslidedown.vi v25, v25, 10
-; LMULMAX1-NEXT: vmsne.vi v26, v25, 0
-; LMULMAX1-NEXT: vse1.v v26, (a1)
+; LMULMAX1-NEXT: vmsne.vi v0, v25, 0
+; LMULMAX1-NEXT: vmv.v.i v25, 0
+; LMULMAX1-NEXT: vmerge.vim v25, v25, 1, v0
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmv.v.i v26, 0
+; LMULMAX1-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; LMULMAX1-NEXT: vslideup.vi v26, v25, 0
+; LMULMAX1-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT: vmsne.vi v25, v26, 0
+; LMULMAX1-NEXT: vse1.v v25, (a1)
; LMULMAX1-NEXT: ret
%a = load <64 x i1>, <64 x i1>* %x
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.v64i1(<64 x i1> %a, i64 42)
@@ -441,7 +489,15 @@ define void @extract_v2i1_nxv2i1_0(<vscale x 2 x i1> %x, <2 x i1>* %y) {
; CHECK-LABEL: extract_v2i1_nxv2i1_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v0, (a0)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 0)
store <2 x i1> %c, <2 x i1>* %y
@@ -457,8 +513,16 @@ define void @extract_v2i1_nxv2i1_2(<vscale x 2 x i1> %x, <2 x i1>* %y) {
; CHECK-NEXT: vsetivli a1, 2, e8,mf4,ta,mu
; CHECK-NEXT: vslidedown.vi v25, v25, 2
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
-; CHECK-NEXT: vmsne.vi v26, v25, 0
-; CHECK-NEXT: vse1.v v26, (a0)
+; CHECK-NEXT: vmsne.vi v0, v25, 0
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv2i1(<vscale x 2 x i1> %x, i64 2)
store <2 x i1> %c, <2 x i1>* %y
@@ -469,7 +533,15 @@ define void @extract_v2i1_nxv64i1_0(<vscale x 64 x i1> %x, <2 x i1>* %y) {
; CHECK-LABEL: extract_v2i1_nxv64i1_0:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v0, (a0)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 0)
store <2 x i1> %c, <2 x i1>* %y
@@ -485,7 +557,15 @@ define void @extract_v2i1_nxv64i1_2(<vscale x 64 x i1> %x, <2 x i1>* %y) {
; CHECK-NEXT: vsetivli a1, 2, e8,m8,ta,mu
; CHECK-NEXT: vslidedown.vi v8, v8, 2
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
-; CHECK-NEXT: vmsne.vi v25, v8, 0
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 2)
@@ -503,7 +583,15 @@ define void @extract_v2i1_nxv64i1_42(<vscale x 64 x i1> %x, <2 x i1>* %y) {
; CHECK-NEXT: vsetivli a2, 2, e8,m8,ta,mu
; CHECK-NEXT: vslidedown.vx v8, v8, a1
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
-; CHECK-NEXT: vmsne.vi v25, v8, 0
+; CHECK-NEXT: vmsne.vi v0, v8, 0
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv64i1(<vscale x 64 x i1> %x, i64 42)
@@ -520,7 +608,15 @@ define void @extract_v2i1_nxv32i1_26(<vscale x 32 x i1> %x, <2 x i1>* %y) {
; CHECK-NEXT: vsetivli a1, 2, e8,m4,ta,mu
; CHECK-NEXT: vslidedown.vi v28, v28, 26
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
-; CHECK-NEXT: vmsne.vi v25, v28, 0
+; CHECK-NEXT: vmsne.vi v0, v28, 0
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
%c = call <2 x i1> @llvm.experimental.vector.extract.v2i1.nxv32i1(<vscale x 32 x i1> %x, i64 26)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
index 6c14ed5f738f..61592783e625 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll
@@ -42,9 +42,17 @@ define void @fcmp_une_vv_v4f32(<4 x float>* %x, <4 x float>* %y, <4 x i1>* %z) {
; CHECK-NEXT: vsetivli a3, 4, e32,m1,ta,mu
; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vle32.v v26, (a1)
-; CHECK-NEXT: vmfne.vv v27, v25, v26
+; CHECK-NEXT: vmfne.vv v0, v25, v26
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v27, (a2)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %x
%b = load <4 x float>, <4 x float>* %y
@@ -59,9 +67,17 @@ define void @fcmp_une_vv_v4f32_nonans(<4 x float>* %x, <4 x float>* %y, <4 x i1>
; CHECK-NEXT: vsetivli a3, 4, e32,m1,ta,mu
; CHECK-NEXT: vle32.v v25, (a0)
; CHECK-NEXT: vle32.v v26, (a1)
-; CHECK-NEXT: vmfne.vv v27, v25, v26
+; CHECK-NEXT: vmfne.vv v0, v25, v26
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v27, (a2)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %x
%b = load <4 x float>, <4 x float>* %y
@@ -76,9 +92,17 @@ define void @fcmp_ogt_vv_v2f64(<2 x double>* %x, <2 x double>* %y, <2 x i1>* %z)
; CHECK-NEXT: vsetivli a3, 2, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v25, (a0)
; CHECK-NEXT: vle64.v v26, (a1)
-; CHECK-NEXT: vmflt.vv v27, v26, v25
+; CHECK-NEXT: vmflt.vv v0, v26, v25
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v27, (a2)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <2 x double>, <2 x double>* %x
%b = load <2 x double>, <2 x double>* %y
@@ -93,9 +117,17 @@ define void @fcmp_ogt_vv_v2f64_nonans(<2 x double>* %x, <2 x double>* %y, <2 x i
; CHECK-NEXT: vsetivli a3, 2, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v25, (a0)
; CHECK-NEXT: vle64.v v26, (a1)
-; CHECK-NEXT: vmflt.vv v27, v26, v25
+; CHECK-NEXT: vmflt.vv v0, v26, v25
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v27, (a2)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <2 x double>, <2 x double>* %x
%b = load <2 x double>, <2 x double>* %y
@@ -178,8 +210,16 @@ define void @fcmp_ole_vv_v4f64(<4 x double>* %x, <4 x double>* %y, <4 x i1>* %z)
; CHECK-NEXT: vsetivli a3, 4, e64,m2,ta,mu
; CHECK-NEXT: vle64.v v26, (a0)
; CHECK-NEXT: vle64.v v28, (a1)
-; CHECK-NEXT: vmfle.vv v25, v26, v28
+; CHECK-NEXT: vmfle.vv v0, v26, v28
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <4 x double>, <4 x double>* %x
@@ -195,8 +235,16 @@ define void @fcmp_ole_vv_v4f64_nonans(<4 x double>* %x, <4 x double>* %y, <4 x i
; CHECK-NEXT: vsetivli a3, 4, e64,m2,ta,mu
; CHECK-NEXT: vle64.v v26, (a0)
; CHECK-NEXT: vle64.v v28, (a1)
-; CHECK-NEXT: vmfle.vv v25, v26, v28
+; CHECK-NEXT: vmfle.vv v0, v26, v28
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <4 x double>, <4 x double>* %x
@@ -433,7 +481,15 @@ define void @fcmp_ord_vv_v4f16(<4 x half>* %x, <4 x half>* %y, <4 x i1>* %z) {
; CHECK-NEXT: vmfeq.vv v27, v25, v25
; CHECK-NEXT: vmfeq.vv v25, v26, v26
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v25, v25, v27
+; CHECK-NEXT: vmand.mm v0, v25, v27
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <4 x half>, <4 x half>* %x
@@ -452,7 +508,15 @@ define void @fcmp_uno_vv_v4f16(<2 x half>* %x, <2 x half>* %y, <2 x i1>* %z) {
; CHECK-NEXT: vmfne.vv v27, v25, v25
; CHECK-NEXT: vmfne.vv v25, v26, v26
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v25, v25, v27
+; CHECK-NEXT: vmor.mm v0, v25, v27
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a2)
; CHECK-NEXT: ret
%a = load <2 x half>, <2 x half>* %x
@@ -501,9 +565,17 @@ define void @fcmp_une_vf_v4f32(<4 x float>* %x, float %y, <4 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 4, e32,m1,ta,mu
; CHECK-NEXT: vle32.v v25, (a0)
-; CHECK-NEXT: vmfne.vf v26, v25, fa0
+; CHECK-NEXT: vmfne.vf v0, v25, fa0
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v26, (a1)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %x
%b = insertelement <4 x float> undef, float %y, i32 0
@@ -518,9 +590,17 @@ define void @fcmp_une_vf_v4f32_nonans(<4 x float>* %x, float %y, <4 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 4, e32,m1,ta,mu
; CHECK-NEXT: vle32.v v25, (a0)
-; CHECK-NEXT: vmfne.vf v26, v25, fa0
+; CHECK-NEXT: vmfne.vf v0, v25, fa0
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v26, (a1)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %x
%b = insertelement <4 x float> undef, float %y, i32 0
@@ -535,9 +615,17 @@ define void @fcmp_ogt_vf_v2f64(<2 x double>* %x, double %y, <2 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v25, (a0)
-; CHECK-NEXT: vmfgt.vf v26, v25, fa0
+; CHECK-NEXT: vmfgt.vf v0, v25, fa0
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v26, (a1)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <2 x double>, <2 x double>* %x
%b = insertelement <2 x double> undef, double %y, i32 0
@@ -552,9 +640,17 @@ define void @fcmp_ogt_vf_v2f64_nonans(<2 x double>* %x, double %y, <2 x i1>* %z)
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v25, (a0)
-; CHECK-NEXT: vmfgt.vf v26, v25, fa0
+; CHECK-NEXT: vmfgt.vf v0, v25, fa0
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v26, (a1)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <2 x double>, <2 x double>* %x
%b = insertelement <2 x double> undef, double %y, i32 0
@@ -637,8 +733,16 @@ define void @fcmp_ole_vf_v4f64(<4 x double>* %x, double %y, <4 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 4, e64,m2,ta,mu
; CHECK-NEXT: vle64.v v26, (a0)
-; CHECK-NEXT: vmfle.vf v25, v26, fa0
+; CHECK-NEXT: vmfle.vf v0, v26, fa0
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x double>, <4 x double>* %x
@@ -654,8 +758,16 @@ define void @fcmp_ole_vf_v4f64_nonans(<4 x double>* %x, double %y, <4 x i1>* %z)
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 4, e64,m2,ta,mu
; CHECK-NEXT: vle64.v v26, (a0)
-; CHECK-NEXT: vmfle.vf v25, v26, fa0
+; CHECK-NEXT: vmfle.vf v0, v26, fa0
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x double>, <4 x double>* %x
@@ -893,7 +1005,15 @@ define void @fcmp_ord_vf_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) {
; CHECK-NEXT: vmfeq.vf v27, v26, fa0
; CHECK-NEXT: vmfeq.vv v26, v25, v25
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v25, v26, v27
+; CHECK-NEXT: vmand.mm v0, v26, v27
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x half>, <4 x half>* %x
@@ -913,7 +1033,15 @@ define void @fcmp_uno_vf_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) {
; CHECK-NEXT: vmfne.vf v27, v26, fa0
; CHECK-NEXT: vmfne.vv v26, v25, v25
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v25, v26, v27
+; CHECK-NEXT: vmor.mm v0, v26, v27
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <2 x half>, <2 x half>* %x
@@ -963,9 +1091,17 @@ define void @fcmp_une_fv_v4f32(<4 x float>* %x, float %y, <4 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 4, e32,m1,ta,mu
; CHECK-NEXT: vle32.v v25, (a0)
-; CHECK-NEXT: vmfne.vf v26, v25, fa0
+; CHECK-NEXT: vmfne.vf v0, v25, fa0
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v26, (a1)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %x
%b = insertelement <4 x float> undef, float %y, i32 0
@@ -980,9 +1116,17 @@ define void @fcmp_une_fv_v4f32_nonans(<4 x float>* %x, float %y, <4 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 4, e32,m1,ta,mu
; CHECK-NEXT: vle32.v v25, (a0)
-; CHECK-NEXT: vmfne.vf v26, v25, fa0
+; CHECK-NEXT: vmfne.vf v0, v25, fa0
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v26, (a1)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x float>, <4 x float>* %x
%b = insertelement <4 x float> undef, float %y, i32 0
@@ -997,9 +1141,17 @@ define void @fcmp_ogt_fv_v2f64(<2 x double>* %x, double %y, <2 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v25, (a0)
-; CHECK-NEXT: vmflt.vf v26, v25, fa0
+; CHECK-NEXT: vmflt.vf v0, v25, fa0
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v26, (a1)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <2 x double>, <2 x double>* %x
%b = insertelement <2 x double> undef, double %y, i32 0
@@ -1014,9 +1166,17 @@ define void @fcmp_ogt_fv_v2f64_nonans(<2 x double>* %x, double %y, <2 x i1>* %z)
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 2, e64,m1,ta,mu
; CHECK-NEXT: vle64.v v25, (a0)
-; CHECK-NEXT: vmflt.vf v26, v25, fa0
+; CHECK-NEXT: vmflt.vf v0, v25, fa0
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
-; CHECK-NEXT: vse1.v v26, (a1)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
+; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <2 x double>, <2 x double>* %x
%b = insertelement <2 x double> undef, double %y, i32 0
@@ -1099,8 +1259,16 @@ define void @fcmp_ole_fv_v4f64(<4 x double>* %x, double %y, <4 x i1>* %z) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 4, e64,m2,ta,mu
; CHECK-NEXT: vle64.v v26, (a0)
-; CHECK-NEXT: vmfge.vf v25, v26, fa0
+; CHECK-NEXT: vmfge.vf v0, v26, fa0
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x double>, <4 x double>* %x
@@ -1116,8 +1284,16 @@ define void @fcmp_ole_fv_v4f64_nonans(<4 x double>* %x, double %y, <4 x i1>* %z)
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 4, e64,m2,ta,mu
; CHECK-NEXT: vle64.v v26, (a0)
-; CHECK-NEXT: vmfge.vf v25, v26, fa0
+; CHECK-NEXT: vmfge.vf v0, v26, fa0
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x double>, <4 x double>* %x
@@ -1355,7 +1531,15 @@ define void @fcmp_ord_fv_v4f16(<4 x half>* %x, half %y, <4 x i1>* %z) {
; CHECK-NEXT: vmfeq.vv v27, v25, v25
; CHECK-NEXT: vmfeq.vf v25, v26, fa0
; CHECK-NEXT: vsetivli a0, 4, e8,m1,ta,mu
-; CHECK-NEXT: vmand.mm v25, v25, v27
+; CHECK-NEXT: vmand.mm v0, v25, v27
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x half>, <4 x half>* %x
@@ -1375,7 +1559,15 @@ define void @fcmp_uno_fv_v4f16(<2 x half>* %x, half %y, <2 x i1>* %z) {
; CHECK-NEXT: vmfne.vv v27, v25, v25
; CHECK-NEXT: vmfne.vf v25, v26, fa0
; CHECK-NEXT: vsetivli a0, 2, e8,m1,ta,mu
-; CHECK-NEXT: vmor.mm v25, v25, v27
+; CHECK-NEXT: vmor.mm v0, v25, v27
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <2 x half>, <2 x half>* %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll
index 86aaaacb1ff7..e769f67fd5f2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-load-store.ll
@@ -8,7 +8,15 @@ define void @load_store_v1i1(<1 x i1>* %x, <1 x i1>* %y) {
; CHECK-LABEL: load_store_v1i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 1, e8,m1,ta,mu
-; CHECK-NEXT: vle1.v v25, (a0)
+; CHECK-NEXT: vle1.v v0, (a0)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 1, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <1 x i1>, <1 x i1>* %x
@@ -20,7 +28,15 @@ define void @load_store_v2i1(<2 x i1>* %x, <2 x i1>* %y) {
; CHECK-LABEL: load_store_v2i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 2, e8,m1,ta,mu
-; CHECK-NEXT: vle1.v v25, (a0)
+; CHECK-NEXT: vle1.v v0, (a0)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <2 x i1>, <2 x i1>* %x
@@ -32,7 +48,15 @@ define void @load_store_v4i1(<4 x i1>* %x, <4 x i1>* %y) {
; CHECK-LABEL: load_store_v4i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a2, 4, e8,m1,ta,mu
-; CHECK-NEXT: vle1.v v25, (a0)
+; CHECK-NEXT: vle1.v v0, (a0)
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a0, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a0, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a1)
; CHECK-NEXT: ret
%a = load <4 x i1>, <4 x i1>* %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
index 10a40f862429..7fe24900e2f8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll
@@ -8,7 +8,15 @@ define void @splat_ones_v1i1(<1 x i1>* %x) {
; CHECK-LABEL: splat_ones_v1i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 1, e8,m1,ta,mu
-; CHECK-NEXT: vmset.m v25
+; CHECK-NEXT: vmset.m v0
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a1, 1, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
store <1 x i1> <i1 1>, <1 x i1>* %x
@@ -19,7 +27,15 @@ define void @splat_zeros_v2i1(<2 x i1>* %x) {
; CHECK-LABEL: splat_zeros_v2i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 2, e8,m1,ta,mu
-; CHECK-NEXT: vmclr.m v25
+; CHECK-NEXT: vmclr.m v0
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a1, 2, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
store <2 x i1> zeroinitializer, <2 x i1>* %x
@@ -30,7 +46,15 @@ define void @splat_ones_v4i1(<4 x i1>* %x) {
; CHECK-LABEL: splat_ones_v4i1:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli a1, 4, e8,m1,ta,mu
-; CHECK-NEXT: vmset.m v25
+; CHECK-NEXT: vmset.m v0
+; CHECK-NEXT: vmv.v.i v25, 0
+; CHECK-NEXT: vmerge.vim v25, v25, 1, v0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmv.v.i v26, 0
+; CHECK-NEXT: vsetivli a1, 4, e8,m1,tu,mu
+; CHECK-NEXT: vslideup.vi v26, v25, 0
+; CHECK-NEXT: vsetivli a1, 8, e8,m1,ta,mu
+; CHECK-NEXT: vmsne.vi v25, v26, 0
; CHECK-NEXT: vse1.v v25, (a0)
; CHECK-NEXT: ret
store <4 x i1> <i1 1, i1 1, i1 1, i1 1>, <4 x i1>* %x
More information about the llvm-commits
mailing list