[llvm] 2a1716d - [LegalizeTypes][VP] Widen load/store of fixed length vectors to VP ops
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Mon Jun 12 02:21:17 PDT 2023
Author: Luke Lau
Date: 2023-06-12T10:21:04+01:00
New Revision: 2a1716dec57e8b3dd668df17ecbedfc77a4112e5
URL: https://github.com/llvm/llvm-project/commit/2a1716dec57e8b3dd668df17ecbedfc77a4112e5
DIFF: https://github.com/llvm/llvm-project/commit/2a1716dec57e8b3dd668df17ecbedfc77a4112e5.diff
LOG: [LegalizeTypes][VP] Widen load/store of fixed length vectors to VP ops
If we have a load/store with an illegal fixed length vector result type that
needs widened, e.g. `x:v6i32 = load p`
Instead of just widening it to: `x:v8i32 = load p`
We can widen it to the equivalent VP operation and set the EVL to the
exact number of elements needed: `x:v8i32 = vp_load a, b, mask=true, evl=6`
Provided that the target supports vp_load/vp_store on the widened type.
Scalable vectors are already widened this way where possible, so this
largely reuses the same logic.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D148713
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index e2e1a837aeb59..a1a150d5234b1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -5185,30 +5185,6 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
return SDValue();
}
- SDValue Result;
- SmallVector<SDValue, 16> LdChain; // Chain for the series of load
- if (ExtType != ISD::NON_EXTLOAD)
- Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
- else
- Result = GenWidenVectorLoads(LdChain, LD);
-
- if (Result) {
- // If we generate a single load, we can use that for the chain. Otherwise,
- // build a factor node to remember the multiple loads are independent and
- // chain to that.
- SDValue NewChain;
- if (LdChain.size() == 1)
- NewChain = LdChain[0];
- else
- NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
-
- // Modified the chain - switch anything that used the old chain to use
- // the new one.
- ReplaceValueWith(SDValue(N, 1), NewChain);
-
- return Result;
- }
-
// Generate a vector-predicated load if it is custom/legal on the target. To
// avoid possible recursion, only do this if the widened mask type is legal.
// FIXME: Not all targets may support EVL in VP_LOAD. These will have been
@@ -5218,15 +5194,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), LdVT);
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WideVT.getVectorElementCount());
- if (ExtType == ISD::NON_EXTLOAD && WideVT.isScalableVector() &&
+ if (ExtType == ISD::NON_EXTLOAD &&
TLI.isOperationLegalOrCustom(ISD::VP_LOAD, WideVT) &&
TLI.isTypeLegal(WideMaskVT)) {
SDLoc DL(N);
SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
- MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
- unsigned NumVTElts = LdVT.getVectorMinNumElements();
- SDValue EVL =
- DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
+ LdVT.getVectorElementCount());
const auto *MMO = LD->getMemOperand();
SDValue NewLoad =
DAG.getLoadVP(WideVT, DL, LD->getChain(), LD->getBasePtr(), Mask, EVL,
@@ -5240,6 +5214,30 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
return NewLoad;
}
+ SDValue Result;
+ SmallVector<SDValue, 16> LdChain; // Chain for the series of load
+ if (ExtType != ISD::NON_EXTLOAD)
+ Result = GenWidenVectorExtLoads(LdChain, LD, ExtType);
+ else
+ Result = GenWidenVectorLoads(LdChain, LD);
+
+ if (Result) {
+ // If we generate a single load, we can use that for the chain. Otherwise,
+ // build a factor node to remember the multiple loads are independent and
+ // chain to that.
+ SDValue NewChain;
+ if (LdChain.size() == 1)
+ NewChain = LdChain[0];
+ else
+ NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, LdChain);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return Result;
+ }
+
report_fatal_error("Unable to widen vector load");
}
@@ -6272,14 +6270,6 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
if (ST->isTruncatingStore())
return TLI.scalarizeVectorStore(ST, DAG);
- SmallVector<SDValue, 16> StChain;
- if (GenWidenVectorStores(StChain, ST)) {
- if (StChain.size() == 1)
- return StChain[0];
-
- return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
- }
-
// Generate a vector-predicated store if it is custom/legal on the target.
// To avoid possible recursion, only do this if the widened mask type is
// legal.
@@ -6291,23 +6281,29 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) {
EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StVT);
EVT WideMaskVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
WideVT.getVectorElementCount());
- if (WideVT.isScalableVector() &&
- TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
+
+ if (TLI.isOperationLegalOrCustom(ISD::VP_STORE, WideVT) &&
TLI.isTypeLegal(WideMaskVT)) {
// Widen the value.
SDLoc DL(N);
StVal = GetWidenedVector(StVal);
SDValue Mask = DAG.getAllOnesConstant(DL, WideMaskVT);
- MVT EVLVT = TLI.getVPExplicitVectorLengthTy();
- unsigned NumVTElts = StVT.getVectorMinNumElements();
- SDValue EVL =
- DAG.getVScale(DL, EVLVT, APInt(EVLVT.getScalarSizeInBits(), NumVTElts));
+ SDValue EVL = DAG.getElementCount(DL, TLI.getVPExplicitVectorLengthTy(),
+ StVT.getVectorElementCount());
return DAG.getStoreVP(ST->getChain(), DL, StVal, ST->getBasePtr(),
DAG.getUNDEF(ST->getBasePtr().getValueType()), Mask,
- EVL, StVal.getValueType(), ST->getMemOperand(),
+ EVL, StVT, ST->getMemOperand(),
ST->getAddressingMode());
}
+ SmallVector<SDValue, 16> StChain;
+ if (GenWidenVectorStores(StChain, ST)) {
+ if (StChain.size() == 1)
+ return StChain[0];
+
+ return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other, StChain);
+ }
+
report_fatal_error("Unable to widen vector store");
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
index f11a7b44edaab..b93b35ac61664 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs.ll
@@ -37,33 +37,16 @@ define void @abs_v8i16(ptr %x) {
declare <8 x i16> @llvm.abs.v8i16(<8 x i16>, i1)
define void @abs_v6i16(ptr %x) {
-; LMULMAX1-RV32-LABEL: abs_v6i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vrsub.vi v9, v8, 0
-; LMULMAX1-RV32-NEXT: vmax.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: abs_v6i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vrsub.vi v9, v8, 0
-; LMULMAX1-RV64-NEXT: vmax.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: abs_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0
+; CHECK-NEXT: vmax.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = call <6 x i16> @llvm.abs.v6i16(<6 x i16> %a, i1 false)
store <6 x i16> %b, ptr %x
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
index 16c7611a84430..1d34be990dd25 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll
@@ -220,8 +220,8 @@ define double @extractelt_v4f64(ptr %x) nounwind {
define i64 @extractelt_v3i64(ptr %x) nounwind {
; RV32-LABEL: extractelt_v3i64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma
+; RV32-NEXT: vle64.v v8, (a0)
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
; RV32-NEXT: vslidedown.vi v10, v8, 4
; RV32-NEXT: vmv.x.s a0, v10
@@ -231,7 +231,7 @@ define i64 @extractelt_v3i64(ptr %x) nounwind {
;
; RV64-LABEL: extractelt_v3i64:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vi v8, v8, 2
@@ -485,8 +485,9 @@ define double @extractelt_v4f64_idx(ptr %x, i32 zeroext %idx) nounwind {
define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind {
; RV32-LABEL: extractelt_v3i64_idx:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
+; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV32-NEXT: vadd.vv v8, v8, v8
; RV32-NEXT: add a1, a1, a1
; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma
@@ -499,8 +500,9 @@ define i64 @extractelt_v3i64_idx(ptr %x, i32 zeroext %idx) nounwind {
;
; RV64-LABEL: extractelt_v3i64_idx:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma
; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; RV64-NEXT: vadd.vv v8, v8, v8
; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
; RV64-NEXT: vslidedown.vx v8, v8, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
index 8059f5b5c5545..d6c8a14808274 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll
@@ -25,33 +25,16 @@ define void @fadd_v8f16(ptr %x, ptr %y) {
}
define void @fadd_v6f16(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: fadd_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vfadd.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fadd_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vfadd.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fadd_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfadd.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = fadd <6 x half> %a, %b
@@ -108,33 +91,16 @@ define void @fsub_v8f16(ptr %x, ptr %y) {
}
define void @fsub_v6f16(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: fsub_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vfsub.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fsub_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vfsub.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fsub_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfsub.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = fsub <6 x half> %a, %b
@@ -191,33 +157,16 @@ define void @fmul_v8f16(ptr %x, ptr %y) {
}
define void @fmul_v6f16(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: fmul_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vfmul.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fmul_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vfmul.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fmul_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfmul.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = fmul <6 x half> %a, %b
@@ -274,33 +223,16 @@ define void @fdiv_v8f16(ptr %x, ptr %y) {
}
define void @fdiv_v6f16(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: fdiv_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vfdiv.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fdiv_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vfdiv.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fdiv_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfdiv.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = fdiv <6 x half> %a, %b
@@ -355,31 +287,15 @@ define void @fneg_v8f16(ptr %x) {
}
define void @fneg_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: fneg_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vfneg.v v8, v8
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fneg_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vfneg.v v8, v8
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fneg_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfneg.v v8, v8
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = fneg <6 x half> %a
store <6 x half> %b, ptr %x
@@ -430,31 +346,15 @@ define void @fabs_v8f16(ptr %x) {
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
define void @fabs_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: fabs_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vfabs.v v8, v8
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fabs_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vfabs.v v8, v8
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fabs_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfabs.v v8, v8
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.fabs.v6f16(<6 x half> %a)
store <6 x half> %b, ptr %x
@@ -510,33 +410,16 @@ define void @copysign_v8f16(ptr %x, ptr %y) {
declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>)
define void @copysign_v6f16(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: copysign_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: copysign_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: copysign_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfsgnj.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = call <6 x half> @llvm.copysign.v6f16(<6 x half> %a, <6 x half> %b)
@@ -596,31 +479,15 @@ define void @copysign_vf_v8f16(ptr %x, half %y) {
}
define void @copysign_vf_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: copysign_vf_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vfsgnj.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: copysign_vf_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vfsgnj.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: copysign_vf_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfsgnj.vf v8, v8, fa0
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -679,33 +546,16 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) {
}
define void @copysign_neg_v6f16(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: copysign_neg_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vfsgnjn.vv v8, v8, v9
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: copysign_neg_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vfsgnjn.vv v8, v8, v9
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: copysign_neg_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfsgnjn.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = fneg <6 x half> %b
@@ -769,36 +619,17 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) {
declare <4 x half> @llvm.copysign.v4f16(<4 x half>, <4 x half>)
define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) {
-; LMULMAX1-RV32-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vle32.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: vfncvt.f.f.w v10, v8
-; LMULMAX1-RV32-NEXT: vfsgnjn.vv v8, v9, v10
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 4
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: copysign_neg_trunc_v3f16_v3f32:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV64-NEXT: vle32.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vfncvt.f.f.w v10, v9
-; LMULMAX1-RV64-NEXT: vfsgnjn.vv v8, v8, v10
-; LMULMAX1-RV64-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV64-NEXT: addi a1, a0, 4
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; LMULMAX1-RV64-NEXT: vse16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: copysign_neg_trunc_v3f16_v3f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
+; CHECK-NEXT: vle32.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vfncvt.f.f.w v10, v8
+; CHECK-NEXT: vfsgnjn.vv v8, v9, v10
+; CHECK-NEXT: vsetivli zero, 3, e16, mf2, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <3 x half>, ptr %x
%b = load <3 x float>, ptr %y
%c = fneg <3 x float> %b
@@ -845,31 +676,15 @@ define void @sqrt_v8f16(ptr %x) {
declare <8 x half> @llvm.sqrt.v8f16(<8 x half>)
define void @sqrt_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: sqrt_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vfsqrt.v v8, v8
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: sqrt_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vfsqrt.v v8, v8
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: sqrt_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfsqrt.v v8, v8
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.sqrt.v6f16(<6 x half> %a)
store <6 x half> %b, ptr %x
@@ -927,35 +742,17 @@ define void @fma_v8f16(ptr %x, ptr %y, ptr %z) {
declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
define void @fma_v6f16(ptr %x, ptr %y, ptr %z) {
-; LMULMAX1-RV32-LABEL: fma_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vle16.v v10, (a2)
-; LMULMAX1-RV32-NEXT: vfmacc.vv v10, v8, v9
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v10, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v10, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fma_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fma_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vle16.v v10, (a2)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfmacc.vv v10, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = load <6 x half>, ptr %z
@@ -1023,35 +820,17 @@ define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) {
}
define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) {
-; LMULMAX1-RV32-LABEL: fmsub_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vle16.v v10, (a2)
-; LMULMAX1-RV32-NEXT: vfmsac.vv v10, v8, v9
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v10, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v10, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fmsub_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT: vfmsac.vv v10, v8, v9
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fmsub_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vle16.v v10, (a2)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfmsac.vv v10, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = load <6 x half>, ptr %z
@@ -1858,35 +1637,19 @@ define void @fadd_vf_v8f16(ptr %x, half %y) {
}
define void @fadd_vf_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fadd_vf_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vfadd.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fadd_vf_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vfadd.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
- %a = load <6 x half>, ptr %x
- %b = insertelement <6 x half> poison, half %y, i32 0
- %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
- %d = fadd <6 x half> %a, %c
+; CHECK-LABEL: fadd_vf_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
+ %a = load <6 x half>, ptr %x
+ %b = insertelement <6 x half> poison, half %y, i32 0
+ %c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
+ %d = fadd <6 x half> %a, %c
store <6 x half> %d, ptr %x
ret void
}
@@ -1940,31 +1703,15 @@ define void @fadd_fv_v8f16(ptr %x, half %y) {
}
define void @fadd_fv_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fadd_fv_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vfadd.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fadd_fv_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vfadd.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fadd_fv_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfadd.vf v8, v8, fa0
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2022,31 +1769,15 @@ define void @fsub_vf_v8f16(ptr %x, half %y) {
}
define void @fsub_vf_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fsub_vf_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vfsub.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fsub_vf_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vfsub.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fsub_vf_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfsub.vf v8, v8, fa0
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2104,31 +1835,15 @@ define void @fsub_fv_v8f16(ptr %x, half %y) {
}
define void @fsub_fv_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fsub_fv_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vfrsub.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fsub_fv_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vfrsub.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fsub_fv_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfrsub.vf v8, v8, fa0
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2186,31 +1901,15 @@ define void @fmul_vf_v8f16(ptr %x, half %y) {
}
define void @fmul_vf_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fmul_vf_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vfmul.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fmul_vf_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vfmul.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fmul_vf_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2268,31 +1967,15 @@ define void @fmul_fv_v8f16(ptr %x, half %y) {
}
define void @fmul_fv_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fmul_fv_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vfmul.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fmul_fv_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vfmul.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fmul_fv_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfmul.vf v8, v8, fa0
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2350,31 +2033,15 @@ define void @fdiv_vf_v8f16(ptr %x, half %y) {
}
define void @fdiv_vf_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fdiv_vf_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vfdiv.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fdiv_vf_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vfdiv.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fdiv_vf_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfdiv.vf v8, v8, fa0
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2432,31 +2099,15 @@ define void @fdiv_fv_v8f16(ptr %x, half %y) {
}
define void @fdiv_fv_v6f16(ptr %x, half %y) {
-; LMULMAX1-RV32-LABEL: fdiv_fv_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vfrdiv.vf v8, v8, fa0
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fdiv_fv_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vfrdiv.vf v8, v8, fa0
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fdiv_fv_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfrdiv.vf v8, v8, fa0
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = insertelement <6 x half> poison, half %y, i32 0
%c = shufflevector <6 x half> %b, <6 x half> poison, <6 x i32> zeroinitializer
@@ -2516,33 +2167,16 @@ define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) {
}
define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) {
-; LMULMAX1-RV32-LABEL: fma_vf_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vfmacc.vf v9, fa0, v8
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fma_vf_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vfmacc.vf v9, fa0, v8
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fma_vf_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfmacc.vf v9, fa0, v8
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = insertelement <6 x half> poison, half %z, i32 0
@@ -2607,33 +2241,16 @@ define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) {
}
define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) {
-; LMULMAX1-RV32-LABEL: fma_fv_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vfmacc.vf v9, fa0, v8
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fma_fv_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vfmacc.vf v9, fa0, v8
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fma_fv_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfmacc.vf v9, fa0, v8
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = insertelement <6 x half> poison, half %z, i32 0
@@ -2699,33 +2316,16 @@ define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) {
}
define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) {
-; LMULMAX1-RV32-LABEL: fmsub_vf_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vfmsac.vf v9, fa0, v8
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v9, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v9, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fmsub_vf_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vfmsac.vf v9, fa0, v8
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v9, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v9, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fmsub_vf_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfmsac.vf v9, fa0, v8
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = insertelement <6 x half> poison, half %z, i32 0
@@ -2837,45 +2437,22 @@ define void @trunc_v8f16(ptr %x) {
declare <8 x half> @llvm.trunc.v8f16(<8 x half>)
define void @trunc_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: trunc_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI116_0)
-; LMULMAX1-RV32-NEXT: flh fa5, %lo(.LCPI116_0)(a1)
-; LMULMAX1-RV32-NEXT: vfabs.v v9, v8
-; LMULMAX1-RV32-NEXT: vmflt.vf v0, v9, fa5
-; LMULMAX1-RV32-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; LMULMAX1-RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: trunc_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI116_0)
-; LMULMAX1-RV64-NEXT: flh fa5, %lo(.LCPI116_0)(a1)
-; LMULMAX1-RV64-NEXT: vfabs.v v9, v8
-; LMULMAX1-RV64-NEXT: vmflt.vf v0, v9, fa5
-; LMULMAX1-RV64-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
-; LMULMAX1-RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: trunc_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: lui a1, %hi(.LCPI116_0)
+; CHECK-NEXT: flh fa5, %lo(.LCPI116_0)(a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfabs.v v9, v8
+; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: vfcvt.rtz.x.f.v v9, v8, v0.t
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.trunc.v6f16(<6 x half> %a)
store <6 x half> %b, ptr %x
@@ -2952,49 +2529,24 @@ define void @ceil_v8f16(ptr %x) {
declare <8 x half> @llvm.ceil.v8f16(<8 x half>)
define void @ceil_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: ceil_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI120_0)
-; LMULMAX1-RV32-NEXT: flh fa5, %lo(.LCPI120_0)(a1)
-; LMULMAX1-RV32-NEXT: vfabs.v v9, v8
-; LMULMAX1-RV32-NEXT: vmflt.vf v0, v9, fa5
-; LMULMAX1-RV32-NEXT: fsrmi a1, 3
-; LMULMAX1-RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; LMULMAX1-RV32-NEXT: fsrm a1
-; LMULMAX1-RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: ceil_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI120_0)
-; LMULMAX1-RV64-NEXT: flh fa5, %lo(.LCPI120_0)(a1)
-; LMULMAX1-RV64-NEXT: vfabs.v v9, v8
-; LMULMAX1-RV64-NEXT: vmflt.vf v0, v9, fa5
-; LMULMAX1-RV64-NEXT: fsrmi a1, 3
-; LMULMAX1-RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; LMULMAX1-RV64-NEXT: fsrm a1
-; LMULMAX1-RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: ceil_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: lui a1, %hi(.LCPI120_0)
+; CHECK-NEXT: flh fa5, %lo(.LCPI120_0)(a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfabs.v v9, v8
+; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: fsrmi a1, 3
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.ceil.v6f16(<6 x half> %a)
store <6 x half> %b, ptr %x
@@ -3075,49 +2627,24 @@ define void @floor_v8f16(ptr %x) {
declare <8 x half> @llvm.floor.v8f16(<8 x half>)
define void @floor_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: floor_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI124_0)
-; LMULMAX1-RV32-NEXT: flh fa5, %lo(.LCPI124_0)(a1)
-; LMULMAX1-RV32-NEXT: vfabs.v v9, v8
-; LMULMAX1-RV32-NEXT: vmflt.vf v0, v9, fa5
-; LMULMAX1-RV32-NEXT: fsrmi a1, 2
-; LMULMAX1-RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; LMULMAX1-RV32-NEXT: fsrm a1
-; LMULMAX1-RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: floor_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI124_0)
-; LMULMAX1-RV64-NEXT: flh fa5, %lo(.LCPI124_0)(a1)
-; LMULMAX1-RV64-NEXT: vfabs.v v9, v8
-; LMULMAX1-RV64-NEXT: vmflt.vf v0, v9, fa5
-; LMULMAX1-RV64-NEXT: fsrmi a1, 2
-; LMULMAX1-RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; LMULMAX1-RV64-NEXT: fsrm a1
-; LMULMAX1-RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: floor_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: lui a1, %hi(.LCPI124_0)
+; CHECK-NEXT: flh fa5, %lo(.LCPI124_0)(a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfabs.v v9, v8
+; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: fsrmi a1, 2
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.floor.v6f16(<6 x half> %a)
store <6 x half> %b, ptr %x
@@ -3198,49 +2725,24 @@ define void @round_v8f16(ptr %x) {
declare <8 x half> @llvm.round.v8f16(<8 x half>)
define void @round_v6f16(ptr %x) {
-; LMULMAX1-RV32-LABEL: round_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: lui a1, %hi(.LCPI128_0)
-; LMULMAX1-RV32-NEXT: flh fa5, %lo(.LCPI128_0)(a1)
-; LMULMAX1-RV32-NEXT: vfabs.v v9, v8
-; LMULMAX1-RV32-NEXT: vmflt.vf v0, v9, fa5
-; LMULMAX1-RV32-NEXT: fsrmi a1, 4
-; LMULMAX1-RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; LMULMAX1-RV32-NEXT: fsrm a1
-; LMULMAX1-RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV32-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: round_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: lui a1, %hi(.LCPI128_0)
-; LMULMAX1-RV64-NEXT: flh fa5, %lo(.LCPI128_0)(a1)
-; LMULMAX1-RV64-NEXT: vfabs.v v9, v8
-; LMULMAX1-RV64-NEXT: vmflt.vf v0, v9, fa5
-; LMULMAX1-RV64-NEXT: fsrmi a1, 4
-; LMULMAX1-RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t
-; LMULMAX1-RV64-NEXT: fsrm a1
-; LMULMAX1-RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t
-; LMULMAX1-RV64-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; LMULMAX1-RV64-NEXT: vfsgnj.vv v8, v9, v8, v0.t
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: round_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: lui a1, %hi(.LCPI128_0)
+; CHECK-NEXT: flh fa5, %lo(.LCPI128_0)(a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfabs.v v9, v8
+; CHECK-NEXT: vmflt.vf v0, v9, fa5
+; CHECK-NEXT: fsrmi a1, 4
+; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t
+; CHECK-NEXT: fsrm a1
+; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu
+; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = call <6 x half> @llvm.round.v6f16(<6 x half> %a)
store <6 x half> %b, ptr %x
@@ -3454,35 +2956,17 @@ define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
declare <8 x half> @llvm.fmuladd.v8f16(<8 x half>, <8 x half>, <8 x half>)
define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
-; LMULMAX1-RV32-LABEL: fmuladd_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vle16.v v10, (a2)
-; LMULMAX1-RV32-NEXT: vfmacc.vv v10, v8, v9
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v10, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v10, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fmuladd_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT: vfmacc.vv v10, v8, v9
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fmuladd_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vle16.v v10, (a2)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfmacc.vv v10, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = load <6 x half>, ptr %z
@@ -3550,35 +3034,17 @@ define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) {
}
define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) {
-; LMULMAX1-RV32-LABEL: fmsub_fmuladd_v6f16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV32-NEXT: vle16.v v10, (a2)
-; LMULMAX1-RV32-NEXT: vfmsac.vv v10, v8, v9
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v10, 2
-; LMULMAX1-RV32-NEXT: addi a1, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vse16.v v10, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: fmsub_fmuladd_v6f16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vle16.v v8, (a0)
-; LMULMAX1-RV64-NEXT: vle16.v v9, (a1)
-; LMULMAX1-RV64-NEXT: vle16.v v10, (a2)
-; LMULMAX1-RV64-NEXT: vfmsac.vv v10, v8, v9
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vse64.v v10, (a0)
-; LMULMAX1-RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV64-NEXT: vslidedown.vi v8, v10, 2
-; LMULMAX1-RV64-NEXT: addi a0, a0, 8
-; LMULMAX1-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: fmsub_fmuladd_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vle16.v v10, (a2)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vfmsac.vv v10, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v10, (a0)
+; CHECK-NEXT: ret
%a = load <6 x half>, ptr %x
%b = load <6 x half>, ptr %y
%c = load <6 x half>, ptr %z
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
index 88a5dbb1386a9..bb39feeb1d067 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp2i.ll
@@ -79,57 +79,15 @@ define <2 x i1> @fp2ui_v2f32_v2i1(<2 x float> %x) {
}
define void @fp2si_v3f32_v3i32(ptr %x, ptr %y) {
-; LMULMAX8RV32-LABEL: fp2si_v3f32_v3i32:
-; LMULMAX8RV32: # %bb.0:
-; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX8RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
-; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX8RV32-NEXT: addi a0, a1, 8
-; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT: vse32.v v9, (a0)
-; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX8RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX8RV32-NEXT: ret
-;
-; LMULMAX8RV64-LABEL: fp2si_v3f32_v3i32:
-; LMULMAX8RV64: # %bb.0:
-; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX8RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
-; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX8RV64-NEXT: addi a0, a1, 8
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT: vse32.v v9, (a0)
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX8RV64-NEXT: vse64.v v8, (a1)
-; LMULMAX8RV64-NEXT: ret
-;
-; LMULMAX1RV32-LABEL: fp2si_v3f32_v3i32:
-; LMULMAX1RV32: # %bb.0:
-; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX1RV32-NEXT: vfcvt.rtz.x.f.v v8, v8
-; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1RV32-NEXT: addi a0, a1, 8
-; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT: vse32.v v9, (a0)
-; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX1RV32-NEXT: ret
-;
-; LMULMAX1RV64-LABEL: fp2si_v3f32_v3i32:
-; LMULMAX1RV64: # %bb.0:
-; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX1RV64-NEXT: vfcvt.rtz.x.f.v v8, v8
-; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1RV64-NEXT: addi a0, a1, 8
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT: vse32.v v9, (a0)
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1RV64-NEXT: vse64.v v8, (a1)
-; LMULMAX1RV64-NEXT: ret
+; CHECK-LABEL: fp2si_v3f32_v3i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfcvt.rtz.x.f.v v8, v8
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a1)
+; CHECK-NEXT: ret
%a = load <3 x float>, ptr %x
%d = fptosi <3 x float> %a to <3 x i32>
store <3 x i32> %d, ptr %y
@@ -137,57 +95,15 @@ define void @fp2si_v3f32_v3i32(ptr %x, ptr %y) {
}
define void @fp2ui_v3f32_v3i32(ptr %x, ptr %y) {
-; LMULMAX8RV32-LABEL: fp2ui_v3f32_v3i32:
-; LMULMAX8RV32: # %bb.0:
-; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX8RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX8RV32-NEXT: addi a0, a1, 8
-; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT: vse32.v v9, (a0)
-; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX8RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX8RV32-NEXT: ret
-;
-; LMULMAX8RV64-LABEL: fp2ui_v3f32_v3i32:
-; LMULMAX8RV64: # %bb.0:
-; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX8RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX8RV64-NEXT: addi a0, a1, 8
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT: vse32.v v9, (a0)
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX8RV64-NEXT: vse64.v v8, (a1)
-; LMULMAX8RV64-NEXT: ret
-;
-; LMULMAX1RV32-LABEL: fp2ui_v3f32_v3i32:
-; LMULMAX1RV32: # %bb.0:
-; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX1RV32-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1RV32-NEXT: addi a0, a1, 8
-; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT: vse32.v v9, (a0)
-; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX1RV32-NEXT: ret
-;
-; LMULMAX1RV64-LABEL: fp2ui_v3f32_v3i32:
-; LMULMAX1RV64: # %bb.0:
-; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX1RV64-NEXT: vfcvt.rtz.xu.f.v v8, v8
-; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1RV64-NEXT: addi a0, a1, 8
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT: vse32.v v9, (a0)
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1RV64-NEXT: vse64.v v8, (a1)
-; LMULMAX1RV64-NEXT: ret
+; CHECK-LABEL: fp2ui_v3f32_v3i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfcvt.rtz.xu.f.v v8, v8
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a1)
+; CHECK-NEXT: ret
%a = load <3 x float>, ptr %x
%d = fptoui <3 x float> %a to <3 x i32>
store <3 x i32> %d, ptr %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
index b60f9405a760f..b3cda0a4ac342 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-i2fp.ll
@@ -85,57 +85,15 @@ define <2 x float> @ui2fp_v2i1_v2f32(<2 x i1> %x) {
}
define void @si2fp_v3i32_v3f32(ptr %x, ptr %y) {
-; LMULMAX8RV32-LABEL: si2fp_v3i32_v3f32:
-; LMULMAX8RV32: # %bb.0:
-; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX8RV32-NEXT: vfcvt.f.x.v v8, v8
-; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX8RV32-NEXT: addi a0, a1, 8
-; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT: vse32.v v9, (a0)
-; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX8RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX8RV32-NEXT: ret
-;
-; LMULMAX8RV64-LABEL: si2fp_v3i32_v3f32:
-; LMULMAX8RV64: # %bb.0:
-; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX8RV64-NEXT: vfcvt.f.x.v v8, v8
-; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX8RV64-NEXT: addi a0, a1, 8
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT: vse32.v v9, (a0)
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX8RV64-NEXT: vse64.v v8, (a1)
-; LMULMAX8RV64-NEXT: ret
-;
-; LMULMAX1RV32-LABEL: si2fp_v3i32_v3f32:
-; LMULMAX1RV32: # %bb.0:
-; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX1RV32-NEXT: vfcvt.f.x.v v8, v8
-; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1RV32-NEXT: addi a0, a1, 8
-; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT: vse32.v v9, (a0)
-; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX1RV32-NEXT: ret
-;
-; LMULMAX1RV64-LABEL: si2fp_v3i32_v3f32:
-; LMULMAX1RV64: # %bb.0:
-; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX1RV64-NEXT: vfcvt.f.x.v v8, v8
-; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1RV64-NEXT: addi a0, a1, 8
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT: vse32.v v9, (a0)
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1RV64-NEXT: vse64.v v8, (a1)
-; LMULMAX1RV64-NEXT: ret
+; CHECK-LABEL: si2fp_v3i32_v3f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfcvt.f.x.v v8, v8
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a1)
+; CHECK-NEXT: ret
%a = load <3 x i32>, ptr %x
%d = sitofp <3 x i32> %a to <3 x float>
store <3 x float> %d, ptr %y
@@ -143,57 +101,15 @@ define void @si2fp_v3i32_v3f32(ptr %x, ptr %y) {
}
define void @ui2fp_v3i32_v3f32(ptr %x, ptr %y) {
-; LMULMAX8RV32-LABEL: ui2fp_v3i32_v3f32:
-; LMULMAX8RV32: # %bb.0:
-; LMULMAX8RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX8RV32-NEXT: vfcvt.f.xu.v v8, v8
-; LMULMAX8RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX8RV32-NEXT: addi a0, a1, 8
-; LMULMAX8RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV32-NEXT: vse32.v v9, (a0)
-; LMULMAX8RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX8RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX8RV32-NEXT: ret
-;
-; LMULMAX8RV64-LABEL: ui2fp_v3i32_v3f32:
-; LMULMAX8RV64: # %bb.0:
-; LMULMAX8RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX8RV64-NEXT: vfcvt.f.xu.v v8, v8
-; LMULMAX8RV64-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX8RV64-NEXT: addi a0, a1, 8
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8RV64-NEXT: vse32.v v9, (a0)
-; LMULMAX8RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX8RV64-NEXT: vse64.v v8, (a1)
-; LMULMAX8RV64-NEXT: ret
-;
-; LMULMAX1RV32-LABEL: ui2fp_v3i32_v3f32:
-; LMULMAX1RV32: # %bb.0:
-; LMULMAX1RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX1RV32-NEXT: vfcvt.f.xu.v v8, v8
-; LMULMAX1RV32-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1RV32-NEXT: addi a0, a1, 8
-; LMULMAX1RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV32-NEXT: vse32.v v9, (a0)
-; LMULMAX1RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX1RV32-NEXT: vse32.v v8, (a1)
-; LMULMAX1RV32-NEXT: ret
-;
-; LMULMAX1RV64-LABEL: ui2fp_v3i32_v3f32:
-; LMULMAX1RV64: # %bb.0:
-; LMULMAX1RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX1RV64-NEXT: vfcvt.f.xu.v v8, v8
-; LMULMAX1RV64-NEXT: vslidedown.vi v9, v8, 2
-; LMULMAX1RV64-NEXT: addi a0, a1, 8
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1RV64-NEXT: vse32.v v9, (a0)
-; LMULMAX1RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; LMULMAX1RV64-NEXT: vse64.v v8, (a1)
-; LMULMAX1RV64-NEXT: ret
+; CHECK-LABEL: ui2fp_v3i32_v3f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vfcvt.f.xu.v v8, v8
+; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma
+; CHECK-NEXT: vse32.v v8, (a1)
+; CHECK-NEXT: ret
%a = load <3 x i32>, ptr %x
%d = uitofp <3 x i32> %a to <3 x float>
store <3 x float> %d, ptr %y
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index 43dfd63532f68..19a0f7a4d79a0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -39,30 +39,24 @@ define void @insertelt_v4i64(ptr %x, i64 %y) {
define void @insertelt_v3i64(ptr %x, i64 %y) {
; RV32-LABEL: insertelt_v3i64:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 3, e64, m2, ta, ma
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: lw a3, 16(a0)
-; RV32-NEXT: addi a4, a0, 20
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vlse32.v v10, (a4), zero
-; RV32-NEXT: vsetvli zero, zero, e32, m1, tu, ma
-; RV32-NEXT: vmv.s.x v10, a3
-; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; RV32-NEXT: vslideup.vi v8, v10, 2
; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
; RV32-NEXT: vslide1down.vx v10, v8, a1
; RV32-NEXT: vslide1down.vx v10, v10, a2
; RV32-NEXT: vsetivli zero, 3, e64, m2, tu, ma
; RV32-NEXT: vslideup.vi v8, v10, 2
-; RV32-NEXT: sw a1, 16(a0)
-; RV32-NEXT: sw a2, 20(a0)
-; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; RV32-NEXT: vse64.v v8, (a0)
; RV32-NEXT: ret
;
; RV64-LABEL: insertelt_v3i64:
; RV64: # %bb.0:
-; RV64-NEXT: sd a1, 16(a0)
+; RV64-NEXT: vsetivli zero, 3, e64, m2, ta, ma
+; RV64-NEXT: vle64.v v8, (a0)
+; RV64-NEXT: vmv.s.x v10, a1
+; RV64-NEXT: vsetvli zero, zero, e64, m2, tu, ma
+; RV64-NEXT: vslideup.vi v8, v10, 2
+; RV64-NEXT: vse64.v v8, (a0)
; RV64-NEXT: ret
%a = load <3 x i64>, ptr %x, align 8
%b = insertelement <3 x i64> %a, i64 %y, i32 2
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
index e51950d1f8290..a536c121898de 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll
@@ -446,30 +446,22 @@ define void @buildvec_seq2_v16i8_v2i64(ptr %x) {
}
define void @buildvec_seq_v9i8(ptr %x) {
-; RV32-LABEL: buildvec_seq_v9i8:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 3
-; RV32-NEXT: sb a1, 8(a0)
-; RV32-NEXT: li a1, 73
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vmv.s.x v0, a1
-; RV32-NEXT: vmv.v.i v9, 2
-; RV32-NEXT: li a1, 36
-; RV32-NEXT: vmv.s.x v8, a1
-; RV32-NEXT: vmerge.vim v9, v9, 1, v0
-; RV32-NEXT: vmv1r.v v0, v8
-; RV32-NEXT: vmerge.vim v8, v9, 3, v0
-; RV32-NEXT: vse8.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: buildvec_seq_v9i8:
-; RV64: # %bb.0:
-; RV64-NEXT: lui a1, %hi(.LCPI26_0)
-; RV64-NEXT: ld a1, %lo(.LCPI26_0)(a1)
-; RV64-NEXT: li a2, 3
-; RV64-NEXT: sb a2, 8(a0)
-; RV64-NEXT: sd a1, 0(a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: buildvec_seq_v9i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 73
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a1
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 3
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: li a1, 146
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a1
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 2, v0
+; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
store <9 x i8> <i8 1, i8 2, i8 3, i8 1, i8 2, i8 3, i8 1, i8 2, i8 3>, ptr %x
ret void
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
index baf5f9c98826a..05c99f04b7d14 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-splat.ll
@@ -549,68 +549,13 @@ define void @splat_zero_v2i32(ptr %p) {
; Not a power of two and requires more than two scalar stores.
define void @splat_zero_v7i16(ptr %p) {
-; LMULMAX8-RV32-LABEL: splat_zero_v7i16:
-; LMULMAX8-RV32: # %bb.0:
-; LMULMAX8-RV32-NEXT: sh zero, 12(a0)
-; LMULMAX8-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX8-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX8-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX8-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX8-RV32-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX8-RV32-NEXT: addi a0, a0, 8
-; LMULMAX8-RV32-NEXT: vse32.v v8, (a0)
-; LMULMAX8-RV32-NEXT: ret
-;
-; LMULMAX2-RV32-LABEL: splat_zero_v7i16:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: sh zero, 12(a0)
-; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX2-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX2-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX2-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX2-RV32-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX2-RV32-NEXT: addi a0, a0, 8
-; LMULMAX2-RV32-NEXT: vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX1-RV32-LABEL: splat_zero_v7i16:
-; LMULMAX1-RV32: # %bb.0:
-; LMULMAX1-RV32-NEXT: sh zero, 12(a0)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT: vse16.v v8, (a0)
-; LMULMAX1-RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vmv.v.i v8, 0
-; LMULMAX1-RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; LMULMAX1-RV32-NEXT: vslidedown.vi v8, v8, 2
-; LMULMAX1-RV32-NEXT: addi a0, a0, 8
-; LMULMAX1-RV32-NEXT: vse32.v v8, (a0)
-; LMULMAX1-RV32-NEXT: ret
-;
-; LMULMAX8-RV64-LABEL: splat_zero_v7i16:
-; LMULMAX8-RV64: # %bb.0:
-; LMULMAX8-RV64-NEXT: sh zero, 12(a0)
-; LMULMAX8-RV64-NEXT: sw zero, 8(a0)
-; LMULMAX8-RV64-NEXT: sd zero, 0(a0)
-; LMULMAX8-RV64-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: splat_zero_v7i16:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: sh zero, 12(a0)
-; LMULMAX2-RV64-NEXT: sw zero, 8(a0)
-; LMULMAX2-RV64-NEXT: sd zero, 0(a0)
-; LMULMAX2-RV64-NEXT: ret
-;
-; LMULMAX1-RV64-LABEL: splat_zero_v7i16:
-; LMULMAX1-RV64: # %bb.0:
-; LMULMAX1-RV64-NEXT: sh zero, 12(a0)
-; LMULMAX1-RV64-NEXT: sw zero, 8(a0)
-; LMULMAX1-RV64-NEXT: sd zero, 0(a0)
-; LMULMAX1-RV64-NEXT: ret
+; CHECK-LABEL: splat_zero_v7i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vsetivli zero, 7, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
store <7 x i16> zeroinitializer, ptr %p
ret void
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
index 9b40d8f963260..c660e7f8ff65e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll
@@ -39,33 +39,16 @@ define void @add_v8i16(ptr %x, ptr %y) {
}
define void @add_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: add_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vadd.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: add_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vadd.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: add_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vadd.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = add <6 x i16> %a, %b
@@ -138,33 +121,16 @@ define void @sub_v8i16(ptr %x, ptr %y) {
}
define void @sub_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: sub_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vsub.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: sub_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vsub.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: sub_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsub.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = sub <6 x i16> %a, %b
@@ -237,33 +203,16 @@ define void @mul_v8i16(ptr %x, ptr %y) {
}
define void @mul_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: mul_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vmul.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mul_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vmul.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: mul_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmul.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = mul <6 x i16> %a, %b
@@ -336,33 +285,16 @@ define void @and_v8i16(ptr %x, ptr %y) {
}
define void @and_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: and_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vand.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: and_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vand.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: and_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vand.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = and <6 x i16> %a, %b
@@ -435,33 +367,16 @@ define void @or_v8i16(ptr %x, ptr %y) {
}
define void @or_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: or_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vor.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: or_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vor.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: or_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vor.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = or <6 x i16> %a, %b
@@ -534,33 +449,16 @@ define void @xor_v8i16(ptr %x, ptr %y) {
}
define void @xor_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: xor_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vxor.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: xor_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vxor.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: xor_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vxor.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = xor <6 x i16> %a, %b
@@ -633,33 +531,16 @@ define void @lshr_v8i16(ptr %x, ptr %y) {
}
define void @lshr_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: lshr_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vsrl.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: lshr_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vsrl.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: lshr_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsrl.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = lshr <6 x i16> %a, %b
@@ -732,33 +613,16 @@ define void @ashr_v8i16(ptr %x, ptr %y) {
}
define void @ashr_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: ashr_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vsra.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: ashr_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vsra.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: ashr_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsra.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = ashr <6 x i16> %a, %b
@@ -831,33 +695,16 @@ define void @shl_v8i16(ptr %x, ptr %y) {
}
define void @shl_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: shl_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vsll.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: shl_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vsll.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: shl_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsll.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = shl <6 x i16> %a, %b
@@ -930,48 +777,23 @@ define void @sdiv_v8i16(ptr %x, ptr %y) {
}
define void @sdiv_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: sdiv_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a1)
-; RV32-NEXT: vle16.v v9, (a0)
-; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: vslidedown.vi v11, v9, 4
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vdiv.vv v10, v11, v10
-; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v11, v10, 4
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vdiv.vv v8, v9, v8
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v11, 2
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: sdiv_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a1)
-; RV64-NEXT: vle16.v v9, (a0)
-; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 4
-; RV64-NEXT: vslidedown.vi v11, v9, 4
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vdiv.vv v10, v11, v10
-; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT: vdiv.vv v8, v9, v8
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vslideup.vi v8, v10, 4
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: sdiv_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
+; CHECK-NEXT: vslidedown.vi v11, v9, 4
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vdiv.vv v10, v11, v10
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vdiv.vv v8, v9, v8
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 4
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = sdiv <6 x i16> %a, %b
@@ -1044,48 +866,23 @@ define void @srem_v8i16(ptr %x, ptr %y) {
}
define void @srem_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: srem_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a1)
-; RV32-NEXT: vle16.v v9, (a0)
-; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: vslidedown.vi v11, v9, 4
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vrem.vv v10, v11, v10
-; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v11, v10, 4
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vrem.vv v8, v9, v8
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v11, 2
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: srem_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a1)
-; RV64-NEXT: vle16.v v9, (a0)
-; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 4
-; RV64-NEXT: vslidedown.vi v11, v9, 4
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vrem.vv v10, v11, v10
-; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT: vrem.vv v8, v9, v8
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vslideup.vi v8, v10, 4
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: srem_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
+; CHECK-NEXT: vslidedown.vi v11, v9, 4
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vrem.vv v10, v11, v10
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vrem.vv v8, v9, v8
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 4
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = srem <6 x i16> %a, %b
@@ -1158,48 +955,23 @@ define void @udiv_v8i16(ptr %x, ptr %y) {
}
define void @udiv_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: udiv_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a1)
-; RV32-NEXT: vle16.v v9, (a0)
-; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: vslidedown.vi v11, v9, 4
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vdivu.vv v10, v11, v10
-; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v11, v10, 4
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vdivu.vv v8, v9, v8
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v11, 2
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: udiv_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a1)
-; RV64-NEXT: vle16.v v9, (a0)
-; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 4
-; RV64-NEXT: vslidedown.vi v11, v9, 4
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vdivu.vv v10, v11, v10
-; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT: vdivu.vv v8, v9, v8
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vslideup.vi v8, v10, 4
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: udiv_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
+; CHECK-NEXT: vslidedown.vi v11, v9, 4
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vdivu.vv v10, v11, v10
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vdivu.vv v8, v9, v8
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 4
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = udiv <6 x i16> %a, %b
@@ -1272,48 +1044,23 @@ define void @urem_v8i16(ptr %x, ptr %y) {
}
define void @urem_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: urem_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a1)
-; RV32-NEXT: vle16.v v9, (a0)
-; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: vslidedown.vi v11, v9, 4
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vremu.vv v10, v11, v10
-; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v11, v10, 4
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vremu.vv v8, v9, v8
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v11, 2
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: urem_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a1)
-; RV64-NEXT: vle16.v v9, (a0)
-; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 4
-; RV64-NEXT: vslidedown.vi v11, v9, 4
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vremu.vv v10, v11, v10
-; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT: vremu.vv v8, v9, v8
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vslideup.vi v8, v10, 4
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: urem_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a1)
+; CHECK-NEXT: vle16.v v9, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
+; CHECK-NEXT: vslidedown.vi v11, v9, 4
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vremu.vv v10, v11, v10
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vremu.vv v8, v9, v8
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 4
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%c = urem <6 x i16> %a, %b
@@ -1493,58 +1240,30 @@ define void @mulhu_v8i16(ptr %x) {
}
define void @mulhu_v6i16(ptr %x) {
-; RV32-LABEL: mulhu_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 4
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vid.v v10
-; RV32-NEXT: vadd.vi v10, v10, 12
-; RV32-NEXT: vdivu.vv v9, v9, v10
-; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v10, v9, 4
-; RV32-NEXT: lui a1, %hi(.LCPI67_0)
-; RV32-NEXT: addi a1, a1, %lo(.LCPI67_0)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vdivu.vv v8, v8, v9
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 2
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mulhu_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: lui a1, %hi(.LCPI67_0)
-; RV64-NEXT: addi a1, a1, %lo(.LCPI67_0)
-; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vdivu.vv v9, v8, v9
-; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 4
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vid.v v10
-; RV64-NEXT: vadd.vi v10, v10, 12
-; RV64-NEXT: vdivu.vv v8, v8, v10
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vslideup.vi v9, v8, 4
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v9, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v9, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
- %a = load <6 x i16>, ptr %x
- %b = udiv <6 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13>
- store <6 x i16> %b, ptr %x
- ret void
+; CHECK-LABEL: mulhu_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: lui a1, %hi(.LCPI67_0)
+; CHECK-NEXT: addi a1, a1, %lo(.LCPI67_0)
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vdivu.vv v9, v8, v9
+; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v8, v8, 4
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: vadd.vi v10, v10, 12
+; CHECK-NEXT: vdivu.vv v8, v8, v10
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vi v9, v8, 4
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v9, (a0)
+; CHECK-NEXT: ret
+ %a = load <6 x i16>, ptr %x
+ %b = udiv <6 x i16> %a, <i16 7, i16 9, i16 10, i16 11, i16 12, i16 13>
+ store <6 x i16> %b, ptr %x
+ ret void
}
define void @mulhu_v4i32(ptr %x) {
@@ -1716,62 +1435,30 @@ define void @mulhs_v8i16(ptr %x) {
}
define void @mulhs_v6i16(ptr %x) {
-; RV32-LABEL: mulhs_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vmv.v.i v9, 7
-; RV32-NEXT: vid.v v10
-; RV32-NEXT: li a1, -14
-; RV32-NEXT: vmadd.vx v10, a1, v9
-; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 4
-; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV32-NEXT: vdiv.vv v9, v9, v10
-; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma
-; RV32-NEXT: vslideup.vi v10, v9, 4
-; RV32-NEXT: li a1, 6
-; RV32-NEXT: vmv.s.x v0, a1
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vmv.v.i v9, -7
-; RV32-NEXT: vmerge.vim v9, v9, 7, v0
-; RV32-NEXT: vdiv.vv v8, v8, v9
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v8, v10, 2
-; RV32-NEXT: addi a0, a0, 8
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: mulhs_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vmv.v.i v9, 7
-; RV64-NEXT: vid.v v10
-; RV64-NEXT: li a1, -14
-; RV64-NEXT: vmadd.vx v10, a1, v9
-; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 4
-; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vdiv.vv v9, v9, v10
-; RV64-NEXT: li a1, 6
-; RV64-NEXT: vmv.s.x v0, a1
-; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT: vmv.v.i v10, -7
-; RV64-NEXT: vmerge.vim v10, v10, 7, v0
-; RV64-NEXT: vdiv.vv v8, v8, v10
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vslideup.vi v8, v9, 4
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: mulhs_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.v.i v9, 7
+; CHECK-NEXT: vid.v v10
+; CHECK-NEXT: li a1, -14
+; CHECK-NEXT: vmadd.vx v10, a1, v9
+; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma
+; CHECK-NEXT: vslidedown.vi v9, v8, 4
+; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
+; CHECK-NEXT: vdiv.vv v9, v9, v10
+; CHECK-NEXT: li a1, 6
+; CHECK-NEXT: vmv.s.x v0, a1
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vmv.v.i v10, -7
+; CHECK-NEXT: vmerge.vim v10, v10, 7, v0
+; CHECK-NEXT: vdiv.vv v8, v8, v10
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vslideup.vi v8, v9, 4
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = sdiv <6 x i16> %a, <i16 -7, i16 7, i16 7, i16 -7, i16 7, i16 -7>
store <6 x i16> %b, ptr %x
@@ -1917,33 +1604,16 @@ define void @smin_v8i16(ptr %x, ptr %y) {
}
define void @smin_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: smin_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vmin.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: smin_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vmin.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: smin_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmin.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%cc = icmp slt <6 x i16> %a, %b
@@ -2021,31 +1691,15 @@ define void @smin_vx_v8i16(ptr %x, i16 %y) {
declare <8 x i16> @llvm.smin.v8i16(<8 x i16>, <8 x i16>)
define void @smin_vx_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: smin_vx_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vmin.vx v8, v8, a1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: smin_vx_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vmin.vx v8, v8, a1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: smin_vx_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmin.vx v8, v8, a1
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = insertelement <6 x i16> poison, i16 %y, i32 0
%c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2105,31 +1759,15 @@ define void @smin_xv_v8i16(ptr %x, i16 %y) {
}
define void @smin_xv_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: smin_xv_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vmin.vx v8, v8, a1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: smin_xv_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vmin.vx v8, v8, a1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: smin_xv_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmin.vx v8, v8, a1
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = insertelement <6 x i16> poison, i16 %y, i32 0
%c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2189,33 +1827,16 @@ define void @smax_v8i16(ptr %x, ptr %y) {
}
define void @smax_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: smax_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vmax.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: smax_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vmax.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: smax_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmax.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%cc = icmp sgt <6 x i16> %a, %b
@@ -2293,31 +1914,15 @@ define void @smax_vx_v8i16(ptr %x, i16 %y) {
declare <8 x i16> @llvm.smax.v8i16(<8 x i16>, <8 x i16>)
define void @smax_vx_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: smax_vx_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vmax.vx v8, v8, a1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: smax_vx_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vmax.vx v8, v8, a1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: smax_vx_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, a1
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = insertelement <6 x i16> poison, i16 %y, i32 0
%c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2377,31 +1982,15 @@ define void @smax_xv_v8i16(ptr %x, i16 %y) {
}
define void @smax_xv_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: smax_xv_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vmax.vx v8, v8, a1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: smax_xv_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vmax.vx v8, v8, a1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: smax_xv_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmax.vx v8, v8, a1
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = insertelement <6 x i16> poison, i16 %y, i32 0
%c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2461,33 +2050,16 @@ define void @umin_v8i16(ptr %x, ptr %y) {
}
define void @umin_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: umin_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vminu.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: umin_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vminu.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: umin_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vminu.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%cc = icmp ult <6 x i16> %a, %b
@@ -2565,31 +2137,15 @@ define void @umin_vx_v8i16(ptr %x, i16 %y) {
declare <8 x i16> @llvm.umin.v8i16(<8 x i16>, <8 x i16>)
define void @umin_vx_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: umin_vx_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vminu.vx v8, v8, a1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: umin_vx_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vminu.vx v8, v8, a1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: umin_vx_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vminu.vx v8, v8, a1
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = insertelement <6 x i16> poison, i16 %y, i32 0
%c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2649,31 +2205,15 @@ define void @umin_xv_v8i16(ptr %x, i16 %y) {
}
define void @umin_xv_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: umin_xv_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vminu.vx v8, v8, a1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: umin_xv_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vminu.vx v8, v8, a1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: umin_xv_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vminu.vx v8, v8, a1
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = insertelement <6 x i16> poison, i16 %y, i32 0
%c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2733,33 +2273,16 @@ define void @umax_v8i16(ptr %x, ptr %y) {
}
define void @umax_v6i16(ptr %x, ptr %y) {
-; RV32-LABEL: umax_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vle16.v v9, (a1)
-; RV32-NEXT: vmaxu.vv v8, v8, v9
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: umax_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vle16.v v9, (a1)
-; RV64-NEXT: vmaxu.vv v8, v8, v9
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: umax_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vle16.v v9, (a1)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmaxu.vv v8, v8, v9
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = load <6 x i16>, ptr %y
%cc = icmp ugt <6 x i16> %a, %b
@@ -2837,31 +2360,15 @@ define void @umax_vx_v8i16(ptr %x, i16 %y) {
declare <8 x i16> @llvm.umax.v8i16(<8 x i16>, <8 x i16>)
define void @umax_vx_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: umax_vx_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vmaxu.vx v8, v8, a1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: umax_vx_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vmaxu.vx v8, v8, a1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: umax_vx_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmaxu.vx v8, v8, a1
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = insertelement <6 x i16> poison, i16 %y, i32 0
%c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -2921,31 +2428,15 @@ define void @umax_xv_v8i16(ptr %x, i16 %y) {
}
define void @umax_xv_v6i16(ptr %x, i16 %y) {
-; RV32-LABEL: umax_xv_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: vmaxu.vx v8, v8, a1
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: umax_xv_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: vmaxu.vx v8, v8, a1
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: umax_xv_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vmaxu.vx v8, v8, a1
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
%a = load <6 x i16>, ptr %x
%b = insertelement <6 x i16> poison, i16 %y, i32 0
%c = shufflevector <6 x i16> %b, <6 x i16> poison, <6 x i32> zeroinitializer
@@ -3110,34 +2601,16 @@ define void @add_v8i32(ptr %x, ptr %y) {
}
define void @add_v6i32(ptr %x, ptr %y) {
-; LMULMAX2-RV32-LABEL: add_v6i32:
-; LMULMAX2-RV32: # %bb.0:
-; LMULMAX2-RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vle32.v v8, (a0)
-; LMULMAX2-RV32-NEXT: vle32.v v10, (a1)
-; LMULMAX2-RV32-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
-; LMULMAX2-RV32-NEXT: vslidedown.vi v10, v8, 4
-; LMULMAX2-RV32-NEXT: addi a1, a0, 16
-; LMULMAX2-RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; LMULMAX2-RV32-NEXT: vse32.v v10, (a1)
-; LMULMAX2-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV32-NEXT: vse32.v v8, (a0)
-; LMULMAX2-RV32-NEXT: ret
-;
-; LMULMAX2-RV64-LABEL: add_v6i32:
-; LMULMAX2-RV64: # %bb.0:
-; LMULMAX2-RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vle32.v v8, (a0)
-; LMULMAX2-RV64-NEXT: vle32.v v10, (a1)
-; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10
-; LMULMAX2-RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; LMULMAX2-RV64-NEXT: vslidedown.vi v10, v8, 2
-; LMULMAX2-RV64-NEXT: addi a1, a0, 16
-; LMULMAX2-RV64-NEXT: vse64.v v10, (a1)
-; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX2-RV64-NEXT: vse32.v v8, (a0)
-; LMULMAX2-RV64-NEXT: ret
+; LMULMAX2-LABEL: add_v6i32:
+; LMULMAX2: # %bb.0:
+; LMULMAX2-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; LMULMAX2-NEXT: vle32.v v8, (a0)
+; LMULMAX2-NEXT: vle32.v v10, (a1)
+; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; LMULMAX2-NEXT: vadd.vv v8, v8, v10
+; LMULMAX2-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; LMULMAX2-NEXT: vse32.v v8, (a0)
+; LMULMAX2-NEXT: ret
;
; LMULMAX1-RV32-LABEL: add_v6i32:
; LMULMAX1-RV32: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
index d1a4e47407a97..f0539e7566b67 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll
@@ -10,7 +10,7 @@
define {<3 x i32>, <3 x i32>} @load_factor2_v3(ptr %ptr) {
; CHECK-LABEL: load_factor2_v3:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vid.v v8
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
index 94f189e857ed9..c0a35edc237c3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-load.ll
@@ -3,101 +3,31 @@
; RUN: llc -mtriple=riscv64 -mattr=+v,+zfh,+experimental-zvfh -verify-machineinstrs < %s | FileCheck -check-prefixes=CHECK,RV64 %s
define <5 x i8> @load_v5i8(ptr %p) {
-; RV32-LABEL: load_v5i8:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vle8.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: load_v5i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: load_v5i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: ret
%x = load <5 x i8>, ptr %p
ret <5 x i8> %x
}
define <5 x i8> @load_v5i8_align1(ptr %p) {
-; RV32-LABEL: load_v5i8_align1:
-; RV32: # %bb.0:
-; RV32-NEXT: lbu a1, 1(a0)
-; RV32-NEXT: lbu a2, 0(a0)
-; RV32-NEXT: lbu a3, 2(a0)
-; RV32-NEXT: lbu a4, 3(a0)
-; RV32-NEXT: slli a1, a1, 8
-; RV32-NEXT: or a1, a1, a2
-; RV32-NEXT: slli a3, a3, 16
-; RV32-NEXT: slli a4, a4, 24
-; RV32-NEXT: or a3, a4, a3
-; RV32-NEXT: or a1, a3, a1
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.s.x v8, a1
-; RV32-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 3
-; RV32-NEXT: vmv.x.s a1, v9
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: vmv.x.s a2, v9
-; RV32-NEXT: vslidedown.vi v9, v8, 1
-; RV32-NEXT: vmv.x.s a3, v9
-; RV32-NEXT: vmv.x.s a4, v8
-; RV32-NEXT: lb a0, 4(a0)
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vslide1down.vx v8, v8, a4
-; RV32-NEXT: vslide1down.vx v8, v8, a3
-; RV32-NEXT: vslide1down.vx v8, v8, a2
-; RV32-NEXT: vslide1down.vx v8, v8, a1
-; RV32-NEXT: vslide1down.vx v8, v8, a0
-; RV32-NEXT: vslidedown.vi v8, v8, 3
-; RV32-NEXT: ret
-;
-; RV64-LABEL: load_v5i8_align1:
-; RV64: # %bb.0:
-; RV64-NEXT: lbu a1, 1(a0)
-; RV64-NEXT: lbu a2, 0(a0)
-; RV64-NEXT: lbu a3, 2(a0)
-; RV64-NEXT: lb a4, 3(a0)
-; RV64-NEXT: slli a1, a1, 8
-; RV64-NEXT: or a1, a1, a2
-; RV64-NEXT: slli a3, a3, 16
-; RV64-NEXT: slli a4, a4, 24
-; RV64-NEXT: or a3, a4, a3
-; RV64-NEXT: or a1, a3, a1
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vmv.s.x v8, a1
-; RV64-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; RV64-NEXT: vslidedown.vi v9, v8, 3
-; RV64-NEXT: vmv.x.s a1, v9
-; RV64-NEXT: vslidedown.vi v9, v8, 2
-; RV64-NEXT: vmv.x.s a2, v9
-; RV64-NEXT: vslidedown.vi v9, v8, 1
-; RV64-NEXT: vmv.x.s a3, v9
-; RV64-NEXT: vmv.x.s a4, v8
-; RV64-NEXT: lb a0, 4(a0)
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV64-NEXT: vslide1down.vx v8, v8, a4
-; RV64-NEXT: vslide1down.vx v8, v8, a3
-; RV64-NEXT: vslide1down.vx v8, v8, a2
-; RV64-NEXT: vslide1down.vx v8, v8, a1
-; RV64-NEXT: vslide1down.vx v8, v8, a0
-; RV64-NEXT: vslidedown.vi v8, v8, 3
-; RV64-NEXT: ret
+; CHECK-LABEL: load_v5i8_align1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: ret
%x = load <5 x i8>, ptr %p, align 1
ret <5 x i8> %x
}
define <6 x i8> @load_v6i8(ptr %p) {
-; RV32-LABEL: load_v6i8:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vle8.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: load_v6i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: load_v6i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-NEXT: vle8.v v8, (a0)
+; CHECK-NEXT: ret
%x = load <6 x i8>, ptr %p
ret <6 x i8> %x
}
@@ -105,7 +35,7 @@ define <6 x i8> @load_v6i8(ptr %p) {
define <12 x i8> @load_v12i8(ptr %p) {
; CHECK-LABEL: load_v12i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 12, e8, m1, ta, ma
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: ret
%x = load <12 x i8>, ptr %p
@@ -115,7 +45,7 @@ define <12 x i8> @load_v12i8(ptr %p) {
define <6 x i16> @load_v6i16(ptr %p) {
; CHECK-LABEL: load_v6i16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: ret
%x = load <6 x i16>, ptr %p
@@ -125,7 +55,7 @@ define <6 x i16> @load_v6i16(ptr %p) {
define <6 x half> @load_v6f16(ptr %p) {
; CHECK-LABEL: load_v6f16:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: ret
%x = load <6 x half>, ptr %p
@@ -135,7 +65,7 @@ define <6 x half> @load_v6f16(ptr %p) {
define <6 x float> @load_v6f32(ptr %p) {
; CHECK-LABEL: load_v6f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: ret
%x = load <6 x float>, ptr %p
@@ -145,7 +75,7 @@ define <6 x float> @load_v6f32(ptr %p) {
define <6 x double> @load_v6f64(ptr %p) {
; CHECK-LABEL: load_v6f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
+; CHECK-NEXT: vsetivli zero, 6, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: ret
%x = load <6 x double>, ptr %p
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
index 61a358ac471a3..e6b3c25b5d935 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll
@@ -5,12 +5,8 @@
define void @store_v5i8(ptr %p, <5 x i8> %v) {
; CHECK-LABEL: store_v5i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 4
-; CHECK-NEXT: addi a1, a0, 4
-; CHECK-NEXT: vse8.v v9, (a1)
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <5 x i8> %v, ptr %p
ret void
@@ -19,19 +15,8 @@ define void @store_v5i8(ptr %p, <5 x i8> %v) {
define void @store_v5i8_align1(ptr %p, <5 x i8> %v) {
; CHECK-LABEL: store_v5i8_align1:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e8, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v9, v8, 4
-; CHECK-NEXT: addi a1, a0, 4
-; CHECK-NEXT: vse8.v v9, (a1)
-; CHECK-NEXT: vsetivli zero, 0, e32, mf2, ta, ma
-; CHECK-NEXT: vmv.x.s a1, v8
-; CHECK-NEXT: sb a1, 0(a0)
-; CHECK-NEXT: srli a2, a1, 24
-; CHECK-NEXT: sb a2, 3(a0)
-; CHECK-NEXT: srli a2, a1, 16
-; CHECK-NEXT: sb a2, 2(a0)
-; CHECK-NEXT: srli a1, a1, 8
-; CHECK-NEXT: sb a1, 1(a0)
+; CHECK-NEXT: vsetivli zero, 5, e8, mf2, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <5 x i8> %v, ptr %p, align 1
ret void
@@ -41,110 +26,49 @@ define void @store_v5i8_align1(ptr %p, <5 x i8> %v) {
define void @store_v6i8(ptr %p, <6 x i8> %v) {
; CHECK-LABEL: store_v6i8:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
-; CHECK-NEXT: vse32.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 1, e16, mf2, ta, ma
-; CHECK-NEXT: vslidedown.vi v8, v8, 2
-; CHECK-NEXT: addi a0, a0, 4
-; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: vsetivli zero, 6, e8, mf2, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
; CHECK-NEXT: ret
store <6 x i8> %v, ptr %p
ret void
}
define void @store_v12i8(ptr %p, <12 x i8> %v) {
-; RV32-LABEL: store_v12i8:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; RV32-NEXT: vse8.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: store_v12i8:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: store_v12i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 12, e8, m1, ta, ma
+; CHECK-NEXT: vse8.v v8, (a0)
+; CHECK-NEXT: ret
store <12 x i8> %v, ptr %p
ret void
}
define void @store_v6i16(ptr %p, <6 x i16> %v) {
-; RV32-LABEL: store_v6i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: store_v6i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: store_v6i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
store <6 x i16> %v, ptr %p
ret void
}
define void @store_v6f16(ptr %p, <6 x half> %v) {
-; RV32-LABEL: store_v6f16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV32-NEXT: vslidedown.vi v9, v8, 2
-; RV32-NEXT: addi a1, a0, 8
-; RV32-NEXT: vse32.v v9, (a1)
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: store_v6f16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma
-; RV64-NEXT: vse64.v v8, (a0)
-; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
-; RV64-NEXT: vslidedown.vi v8, v8, 2
-; RV64-NEXT: addi a0, a0, 8
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: store_v6f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma
+; CHECK-NEXT: vse16.v v8, (a0)
+; CHECK-NEXT: ret
store <6 x half> %v, ptr %p
ret void
}
define void @store_v6f32(ptr %p, <6 x float> %v) {
-; RV32-LABEL: store_v6f32:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: addi a1, a0, 16
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vse32.v v10, (a1)
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT: vse32.v v8, (a0)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: store_v6f32:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: addi a1, a0, 16
-; RV64-NEXT: vse64.v v10, (a1)
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; RV64-NEXT: vse32.v v8, (a0)
-; RV64-NEXT: ret
+; CHECK-LABEL: store_v6f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma
+; CHECK-NEXT: vse32.v v8, (a0)
+; CHECK-NEXT: ret
store <6 x float> %v, ptr %p
ret void
}
@@ -152,12 +76,7 @@ define void @store_v6f32(ptr %p, <6 x float> %v) {
define void @store_v6f64(ptr %p, <6 x double> %v) {
; CHECK-LABEL: store_v6f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e64, m4, ta, ma
-; CHECK-NEXT: vslidedown.vi v12, v8, 4
-; CHECK-NEXT: addi a1, a0, 32
-; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vse64.v v12, (a1)
-; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
+; CHECK-NEXT: vsetivli zero, 6, e64, m4, ta, ma
; CHECK-NEXT: vse64.v v8, (a0)
; CHECK-NEXT: ret
store <6 x double> %v, ptr %p
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
index 3aa8ce8f892ca..0ac09ea7f80a3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll
@@ -22,62 +22,20 @@ define void @widen_2xv4i16(ptr %x, ptr %z) {
}
define void @widen_3xv4i16(ptr %x, ptr %z) {
-; RV32-LABEL: widen_3xv4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vle16.v v8, (a0)
-; RV32-NEXT: addi a2, a0, 8
-; RV32-NEXT: vle16.v v10, (a2)
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vle16.v v12, (a0)
-; RV32-NEXT: vsetivli zero, 8, e16, m2, tu, ma
-; RV32-NEXT: vslideup.vi v8, v10, 4
-; RV32-NEXT: addi a0, a1, 16
-; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vse16.v v12, (a0)
-; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV32-NEXT: vse16.v v8, (a1)
-; RV32-NEXT: ret
-;
-; RV64-LABEL: widen_3xv4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV64-NEXT: vle16.v v8, (a0)
-; RV64-NEXT: addi a2, a0, 8
-; RV64-NEXT: vle16.v v10, (a2)
-; RV64-NEXT: addi a0, a0, 16
-; RV64-NEXT: vle16.v v12, (a0)
-; RV64-NEXT: vsetivli zero, 8, e16, m2, tu, ma
-; RV64-NEXT: vslideup.vi v8, v10, 4
-; RV64-NEXT: vsetivli zero, 12, e16, m2, tu, ma
-; RV64-NEXT: vslideup.vi v8, v12, 8
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: addi a0, a1, 16
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; RV64-NEXT: vse16.v v8, (a1)
-; RV64-NEXT: ret
-;
-; ZVE64F-LABEL: widen_3xv4i16:
-; ZVE64F: # %bb.0:
-; ZVE64F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; ZVE64F-NEXT: vle16.v v8, (a0)
-; ZVE64F-NEXT: addi a2, a0, 8
-; ZVE64F-NEXT: vle16.v v10, (a2)
-; ZVE64F-NEXT: addi a0, a0, 16
-; ZVE64F-NEXT: vle16.v v12, (a0)
-; ZVE64F-NEXT: vsetivli zero, 8, e16, m2, tu, ma
-; ZVE64F-NEXT: vslideup.vi v8, v10, 4
-; ZVE64F-NEXT: vsetivli zero, 12, e16, m2, tu, ma
-; ZVE64F-NEXT: vslideup.vi v8, v12, 8
-; ZVE64F-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; ZVE64F-NEXT: vslidedown.vi v10, v8, 2
-; ZVE64F-NEXT: addi a0, a1, 16
-; ZVE64F-NEXT: vse64.v v10, (a0)
-; ZVE64F-NEXT: vsetivli zero, 8, e16, m1, ta, ma
-; ZVE64F-NEXT: vse16.v v8, (a1)
-; ZVE64F-NEXT: ret
+; CHECK-LABEL: widen_3xv4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: addi a2, a0, 8
+; CHECK-NEXT: vle16.v v10, (a2)
+; CHECK-NEXT: addi a0, a0, 16
+; CHECK-NEXT: vle16.v v12, (a0)
+; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v10, 4
+; CHECK-NEXT: vsetivli zero, 12, e16, m2, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v12, 8
+; CHECK-NEXT: vse16.v v8, (a1)
+; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %x
%b.gep = getelementptr i8, ptr %x, i64 8
%b = load <4 x i16>, ptr %b.gep
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
index 0519305e6f2d2..92db0a9e545a9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll
@@ -5,70 +5,65 @@
define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; RV32-LABEL: vselect_vv_v6i32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: lbu a2, 0(a2)
-; RV32-NEXT: vle32.v v8, (a1)
-; RV32-NEXT: andi a1, a2, 1
-; RV32-NEXT: vslide1down.vx v10, v8, a1
-; RV32-NEXT: slli a1, a2, 30
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 29
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 28
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 27
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vle32.v v10, (a1)
+; RV32-NEXT: andi a0, a2, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v8, a0
+; RV32-NEXT: slli a0, a2, 30
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: slli a0, a2, 29
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: slli a0, a2, 28
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: slli a0, a2, 27
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v12, v12, a0
; RV32-NEXT: srli a2, a2, 5
-; RV32-NEXT: vslide1down.vx v10, v10, a2
-; RV32-NEXT: vslidedown.vi v10, v10, 2
-; RV32-NEXT: vand.vi v10, v10, 1
-; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vle32.v v8, (a0), v0.t
-; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: addi a0, a3, 16
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a2
+; RV32-NEXT: vslidedown.vi v12, v12, 2
+; RV32-NEXT: vand.vi v12, v12, 1
+; RV32-NEXT: vmsne.vi v0, v12, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: vse32.v v8, (a3)
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_vv_v6i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: lbu a2, 0(a2)
-; RV64-NEXT: vle32.v v8, (a1)
-; RV64-NEXT: andi a1, a2, 1
-; RV64-NEXT: vslide1down.vx v10, v8, a1
-; RV64-NEXT: slli a1, a2, 62
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 61
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 60
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 59
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vle32.v v10, (a1)
+; RV64-NEXT: andi a0, a2, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v8, a0
+; RV64-NEXT: slli a0, a2, 62
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: slli a0, a2, 61
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: slli a0, a2, 60
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: slli a0, a2, 59
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v12, v12, a0
; RV64-NEXT: srli a2, a2, 5
-; RV64-NEXT: vslide1down.vx v10, v10, a2
-; RV64-NEXT: vslidedown.vi v10, v10, 2
-; RV64-NEXT: vand.vi v10, v10, 1
-; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vle32.v v8, (a0), v0.t
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: addi a0, a3, 16
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a2
+; RV64-NEXT: vslidedown.vi v12, v12, 2
+; RV64-NEXT: vand.vi v12, v12, 1
+; RV64-NEXT: vmsne.vi v0, v12, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: vse32.v v8, (a3)
; RV64-NEXT: ret
%va = load <6 x i32>, ptr %a
@@ -82,10 +77,11 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) {
define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
; RV32-LABEL: vselect_vx_v6i32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: lbu a2, 0(a2)
; RV32-NEXT: vle32.v v8, (a1)
; RV32-NEXT: andi a1, a2, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vslide1down.vx v10, v8, a1
; RV32-NEXT: slli a1, a2, 30
; RV32-NEXT: srli a1, a1, 31
@@ -106,21 +102,17 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32-NEXT: vmerge.vxm v8, v8, a0, v0
-; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: addi a0, a3, 16
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: vse32.v v8, (a3)
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_vx_v6i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: lbu a2, 0(a2)
; RV64-NEXT: vle32.v v8, (a1)
; RV64-NEXT: andi a1, a2, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vslide1down.vx v10, v8, a1
; RV64-NEXT: slli a1, a2, 62
; RV64-NEXT: srli a1, a1, 63
@@ -141,11 +133,7 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vmerge.vxm v8, v8, a0, v0
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: addi a0, a3, 16
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: vse32.v v8, (a3)
; RV64-NEXT: ret
%vb = load <6 x i32>, ptr %b
@@ -160,10 +148,11 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) {
define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
; RV32-LABEL: vselect_vi_v6i32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: lbu a1, 0(a1)
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: andi a0, a1, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vslide1down.vx v10, v8, a0
; RV32-NEXT: slli a0, a1, 30
; RV32-NEXT: srli a0, a0, 31
@@ -184,21 +173,17 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32-NEXT: vmerge.vim v8, v8, -1, v0
-; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: addi a0, a2, 16
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: vse32.v v8, (a2)
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_vi_v6i32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: lbu a1, 0(a1)
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: andi a0, a1, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vslide1down.vx v10, v8, a0
; RV64-NEXT: slli a0, a1, 62
; RV64-NEXT: srli a0, a0, 63
@@ -219,11 +204,7 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vmerge.vim v8, v8, -1, v0
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: addi a0, a2, 16
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: vse32.v v8, (a2)
; RV64-NEXT: ret
%vb = load <6 x i32>, ptr %b
@@ -239,70 +220,65 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) {
define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
; RV32-LABEL: vselect_vv_v6f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: lbu a2, 0(a2)
-; RV32-NEXT: vle32.v v8, (a1)
-; RV32-NEXT: andi a1, a2, 1
-; RV32-NEXT: vslide1down.vx v10, v8, a1
-; RV32-NEXT: slli a1, a2, 30
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 29
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 28
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
-; RV32-NEXT: slli a1, a2, 27
-; RV32-NEXT: srli a1, a1, 31
-; RV32-NEXT: vslide1down.vx v10, v10, a1
+; RV32-NEXT: vle32.v v8, (a0)
+; RV32-NEXT: vle32.v v10, (a1)
+; RV32-NEXT: andi a0, a2, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v8, a0
+; RV32-NEXT: slli a0, a2, 30
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: slli a0, a2, 29
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: slli a0, a2, 28
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v12, v12, a0
+; RV32-NEXT: slli a0, a2, 27
+; RV32-NEXT: srli a0, a0, 31
+; RV32-NEXT: vslide1down.vx v12, v12, a0
; RV32-NEXT: srli a2, a2, 5
-; RV32-NEXT: vslide1down.vx v10, v10, a2
-; RV32-NEXT: vslidedown.vi v10, v10, 2
-; RV32-NEXT: vand.vi v10, v10, 1
-; RV32-NEXT: vmsne.vi v0, v10, 0
-; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV32-NEXT: vle32.v v8, (a0), v0.t
-; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: addi a0, a3, 16
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vslide1down.vx v12, v12, a2
+; RV32-NEXT: vslidedown.vi v12, v12, 2
+; RV32-NEXT: vand.vi v12, v12, 1
+; RV32-NEXT: vmsne.vi v0, v12, 0
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV32-NEXT: vmerge.vvm v8, v10, v8, v0
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: vse32.v v8, (a3)
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_vv_v6f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: lbu a2, 0(a2)
-; RV64-NEXT: vle32.v v8, (a1)
-; RV64-NEXT: andi a1, a2, 1
-; RV64-NEXT: vslide1down.vx v10, v8, a1
-; RV64-NEXT: slli a1, a2, 62
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 61
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 60
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
-; RV64-NEXT: slli a1, a2, 59
-; RV64-NEXT: srli a1, a1, 63
-; RV64-NEXT: vslide1down.vx v10, v10, a1
+; RV64-NEXT: vle32.v v8, (a0)
+; RV64-NEXT: vle32.v v10, (a1)
+; RV64-NEXT: andi a0, a2, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v8, a0
+; RV64-NEXT: slli a0, a2, 62
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: slli a0, a2, 61
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: slli a0, a2, 60
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v12, v12, a0
+; RV64-NEXT: slli a0, a2, 59
+; RV64-NEXT: srli a0, a0, 63
+; RV64-NEXT: vslide1down.vx v12, v12, a0
; RV64-NEXT: srli a2, a2, 5
-; RV64-NEXT: vslide1down.vx v10, v10, a2
-; RV64-NEXT: vslidedown.vi v10, v10, 2
-; RV64-NEXT: vand.vi v10, v10, 1
-; RV64-NEXT: vmsne.vi v0, v10, 0
-; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64-NEXT: vle32.v v8, (a0), v0.t
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: addi a0, a3, 16
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vslide1down.vx v12, v12, a2
+; RV64-NEXT: vslidedown.vi v12, v12, 2
+; RV64-NEXT: vand.vi v12, v12, 1
+; RV64-NEXT: vmsne.vi v0, v12, 0
+; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
+; RV64-NEXT: vmerge.vvm v8, v10, v8, v0
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: vse32.v v8, (a3)
; RV64-NEXT: ret
%va = load <6 x float>, ptr %a
@@ -316,10 +292,11 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) {
define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
; RV32-LABEL: vselect_vx_v6f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: lbu a1, 0(a1)
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: andi a0, a1, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vslide1down.vx v10, v8, a0
; RV32-NEXT: slli a0, a1, 30
; RV32-NEXT: srli a0, a0, 31
@@ -340,21 +317,17 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: addi a0, a2, 16
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: vse32.v v8, (a2)
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_vx_v6f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: lbu a1, 0(a1)
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: andi a0, a1, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vslide1down.vx v10, v8, a0
; RV64-NEXT: slli a0, a1, 62
; RV64-NEXT: srli a0, a0, 63
@@ -375,11 +348,7 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vfmerge.vfm v8, v8, fa0, v0
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: addi a0, a2, 16
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: vse32.v v8, (a2)
; RV64-NEXT: ret
%vb = load <6 x float>, ptr %b
@@ -394,10 +363,11 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) {
define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
; RV32-LABEL: vselect_vfpzero_v6f32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: lbu a1, 0(a1)
; RV32-NEXT: vle32.v v8, (a0)
; RV32-NEXT: andi a0, a1, 1
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV32-NEXT: vslide1down.vx v10, v8, a0
; RV32-NEXT: slli a0, a1, 30
; RV32-NEXT: srli a0, a0, 31
@@ -418,21 +388,17 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
; RV32-NEXT: vmsne.vi v0, v10, 0
; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV32-NEXT: vmerge.vim v8, v8, 0, v0
-; RV32-NEXT: vsetivli zero, 2, e32, m2, ta, ma
-; RV32-NEXT: vslidedown.vi v10, v8, 4
-; RV32-NEXT: addi a0, a2, 16
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vse32.v v10, (a0)
-; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV32-NEXT: vse32.v v8, (a2)
; RV32-NEXT: ret
;
; RV64-LABEL: vselect_vfpzero_v6f32:
; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: lbu a1, 0(a1)
; RV64-NEXT: vle32.v v8, (a0)
; RV64-NEXT: andi a0, a1, 1
+; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
; RV64-NEXT: vslide1down.vx v10, v8, a0
; RV64-NEXT: slli a0, a1, 62
; RV64-NEXT: srli a0, a0, 63
@@ -453,11 +419,7 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) {
; RV64-NEXT: vmsne.vi v0, v10, 0
; RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma
; RV64-NEXT: vmerge.vim v8, v8, 0, v0
-; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma
-; RV64-NEXT: vslidedown.vi v10, v8, 2
-; RV64-NEXT: addi a0, a2, 16
-; RV64-NEXT: vse64.v v10, (a0)
-; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma
; RV64-NEXT: vse32.v v8, (a2)
; RV64-NEXT: ret
%vb = load <6 x float>, ptr %b
More information about the llvm-commits
mailing list