[llvm] 5ffbdd9 - [RISCV] Handle .vx pseudos in hasAllNBitUsers (#67419)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 27 11:54:06 PDT 2023
Author: Luke Lau
Date: 2023-09-27T19:53:50+01:00
New Revision: 5ffbdd9ed5fb719b354e4a46acc8737c5b624f94
URL: https://github.com/llvm/llvm-project/commit/5ffbdd9ed5fb719b354e4a46acc8737c5b624f94
DIFF: https://github.com/llvm/llvm-project/commit/5ffbdd9ed5fb719b354e4a46acc8737c5b624f94.diff
LOG: [RISCV] Handle .vx pseudos in hasAllNBitUsers (#67419)
Vector pseudos with scalar operands only use the lower SEW bits (or less in the
case of shifts and clips). This patch accounts for this in hasAllNBitUsers for
both SDNodes in RISCVISelDAGToDAG. We also need to handle this in
RISCVOptWInstrs otherwise we introduce slliw instructions that are less
compressible than their original slli counterpart.
This is a reland of aff6ffc8760b99cc3d66dd6e251a4f90040c0ab9 with the
refactoring omitted.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 5a5cd8c33ab369a..283ab1feda7eca5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2753,6 +2753,148 @@ bool RISCVDAGToDAGISel::selectSHXADD_UWOp(SDValue N, unsigned ShAmt,
return false;
}
+static bool vectorPseudoHasAllNBitUsers(SDNode *User, unsigned UserOpNo,
+ unsigned Bits,
+ const TargetInstrInfo *TII) {
+ const RISCVVPseudosTable::PseudoInfo *PseudoInfo =
+ RISCVVPseudosTable::getPseudoInfo(User->getMachineOpcode());
+
+ if (!PseudoInfo)
+ return false;
+
+ const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
+ const uint64_t TSFlags = MCID.TSFlags;
+ if (!RISCVII::hasSEWOp(TSFlags))
+ return false;
+ assert(RISCVII::hasVLOp(TSFlags));
+
+ bool HasGlueOp = User->getGluedNode() != nullptr;
+ unsigned ChainOpIdx = User->getNumOperands() - HasGlueOp - 1;
+ bool HasChainOp = User->getOperand(ChainOpIdx).getValueType() == MVT::Other;
+ bool HasVecPolicyOp = RISCVII::hasVecPolicyOp(TSFlags);
+ unsigned VLIdx =
+ User->getNumOperands() - HasVecPolicyOp - HasChainOp - HasGlueOp - 2;
+ const unsigned Log2SEW = User->getConstantOperandVal(VLIdx + 1);
+
+ if (UserOpNo == VLIdx)
+ return false;
+
+ // TODO: Handle Zvbb instructions
+ switch (PseudoInfo->BaseInstr) {
+ default:
+ return false;
+
+ // 11.6. Vector Single-Width Shift Instructions
+ case RISCV::VSLL_VX:
+ case RISCV::VSRL_VX:
+ case RISCV::VSRA_VX:
+ // 12.4. Vector Single-Width Scaling Shift Instructions
+ case RISCV::VSSRL_VX:
+ case RISCV::VSSRA_VX:
+ // Only the low lg2(SEW) bits of the shift-amount value are used.
+ if (Bits < Log2SEW)
+ return false;
+ break;
+
+ // 11.7 Vector Narrowing Integer Right Shift Instructions
+ case RISCV::VNSRL_WX:
+ case RISCV::VNSRA_WX:
+ // 12.5. Vector Narrowing Fixed-Point Clip Instructions
+ case RISCV::VNCLIPU_WX:
+ case RISCV::VNCLIP_WX:
+ // Only the low lg2(2*SEW) bits of the shift-amount value are used.
+ if (Bits < Log2SEW + 1)
+ return false;
+ break;
+
+ // 11.1. Vector Single-Width Integer Add and Subtract
+ case RISCV::VADD_VX:
+ case RISCV::VSUB_VX:
+ case RISCV::VRSUB_VX:
+ // 11.2. Vector Widening Integer Add/Subtract
+ case RISCV::VWADDU_VX:
+ case RISCV::VWSUBU_VX:
+ case RISCV::VWADD_VX:
+ case RISCV::VWSUB_VX:
+ case RISCV::VWADDU_WX:
+ case RISCV::VWSUBU_WX:
+ case RISCV::VWADD_WX:
+ case RISCV::VWSUB_WX:
+ // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
+ case RISCV::VADC_VXM:
+ case RISCV::VADC_VIM:
+ case RISCV::VMADC_VXM:
+ case RISCV::VMADC_VIM:
+ case RISCV::VMADC_VX:
+ case RISCV::VSBC_VXM:
+ case RISCV::VMSBC_VXM:
+ case RISCV::VMSBC_VX:
+ // 11.5 Vector Bitwise Logical Instructions
+ case RISCV::VAND_VX:
+ case RISCV::VOR_VX:
+ case RISCV::VXOR_VX:
+ // 11.8. Vector Integer Compare Instructions
+ case RISCV::VMSEQ_VX:
+ case RISCV::VMSNE_VX:
+ case RISCV::VMSLTU_VX:
+ case RISCV::VMSLT_VX:
+ case RISCV::VMSLEU_VX:
+ case RISCV::VMSLE_VX:
+ case RISCV::VMSGTU_VX:
+ case RISCV::VMSGT_VX:
+ // 11.9. Vector Integer Min/Max Instructions
+ case RISCV::VMINU_VX:
+ case RISCV::VMIN_VX:
+ case RISCV::VMAXU_VX:
+ case RISCV::VMAX_VX:
+ // 11.10. Vector Single-Width Integer Multiply Instructions
+ case RISCV::VMUL_VX:
+ case RISCV::VMULH_VX:
+ case RISCV::VMULHU_VX:
+ case RISCV::VMULHSU_VX:
+ // 11.11. Vector Integer Divide Instructions
+ case RISCV::VDIVU_VX:
+ case RISCV::VDIV_VX:
+ case RISCV::VREMU_VX:
+ case RISCV::VREM_VX:
+ // 11.12. Vector Widening Integer Multiply Instructions
+ case RISCV::VWMUL_VX:
+ case RISCV::VWMULU_VX:
+ case RISCV::VWMULSU_VX:
+ // 11.13. Vector Single-Width Integer Multiply-Add Instructions
+ case RISCV::VMACC_VX:
+ case RISCV::VNMSAC_VX:
+ case RISCV::VMADD_VX:
+ case RISCV::VNMSUB_VX:
+ // 11.14. Vector Widening Integer Multiply-Add Instructions
+ case RISCV::VWMACCU_VX:
+ case RISCV::VWMACC_VX:
+ case RISCV::VWMACCSU_VX:
+ case RISCV::VWMACCUS_VX:
+ // 11.15. Vector Integer Merge Instructions
+ case RISCV::VMERGE_VXM:
+ // 11.16. Vector Integer Move Instructions
+ case RISCV::VMV_V_X:
+ // 12.1. Vector Single-Width Saturating Add and Subtract
+ case RISCV::VSADDU_VX:
+ case RISCV::VSADD_VX:
+ case RISCV::VSSUBU_VX:
+ case RISCV::VSSUB_VX:
+ // 12.2. Vector Single-Width Averaging Add and Subtract
+ case RISCV::VAADDU_VX:
+ case RISCV::VAADD_VX:
+ case RISCV::VASUBU_VX:
+ case RISCV::VASUB_VX:
+ // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+ case RISCV::VSMUL_VX:
+ // 16.1. Integer Scalar Move Instructions
+ case RISCV::VMV_S_X:
+ if (Bits < (1 << Log2SEW))
+ return false;
+ }
+ return true;
+}
+
// Return true if all users of this SDNode* only consume the lower \p Bits.
// This can be used to form W instructions for add/sub/mul/shl even when the
// root isn't a sext_inreg. This can allow the ADDW/SUBW/MULW/SLLIW to CSE if
@@ -2784,6 +2926,8 @@ bool RISCVDAGToDAGISel::hasAllNBitUsers(SDNode *Node, unsigned Bits,
// TODO: Add more opcodes?
switch (User->getMachineOpcode()) {
default:
+ if (vectorPseudoHasAllNBitUsers(User, UI.getOperandNo(), Bits, TII))
+ break;
return false;
case RISCV::ADDW:
case RISCV::ADDIW:
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 3c608bf8b50b57d..0cbdfa84640bf91 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -78,6 +78,141 @@ FunctionPass *llvm::createRISCVOptWInstrsPass() {
return new RISCVOptWInstrs();
}
+static bool vectorPseudoHasAllNBitUsers(const MachineOperand &UserOp,
+ unsigned Bits) {
+ const MachineInstr &MI = *UserOp.getParent();
+ const RISCVVPseudosTable::PseudoInfo *PseudoInfo =
+ RISCVVPseudosTable::getPseudoInfo(MI.getOpcode());
+
+ if (!PseudoInfo)
+ return false;
+
+ const MCInstrDesc &MCID = MI.getDesc();
+ const uint64_t TSFlags = MI.getDesc().TSFlags;
+ if (!RISCVII::hasSEWOp(TSFlags))
+ return false;
+ assert(RISCVII::hasVLOp(TSFlags));
+ const unsigned Log2SEW = MI.getOperand(RISCVII::getSEWOpNum(MCID)).getImm();
+
+ if (UserOp.getOperandNo() == RISCVII::getVLOpNum(MCID))
+ return false;
+
+ // TODO: Handle Zvbb instructions
+ switch (PseudoInfo->BaseInstr) {
+ default:
+ return false;
+
+ // 11.6. Vector Single-Width Shift Instructions
+ case RISCV::VSLL_VX:
+ case RISCV::VSRL_VX:
+ case RISCV::VSRA_VX:
+ // 12.4. Vector Single-Width Scaling Shift Instructions
+ case RISCV::VSSRL_VX:
+ case RISCV::VSSRA_VX:
+ // Only the low lg2(SEW) bits of the shift-amount value are used.
+ if (Bits < Log2SEW)
+ return false;
+ break;
+
+ // 11.7 Vector Narrowing Integer Right Shift Instructions
+ case RISCV::VNSRL_WX:
+ case RISCV::VNSRA_WX:
+ // 12.5. Vector Narrowing Fixed-Point Clip Instructions
+ case RISCV::VNCLIPU_WX:
+ case RISCV::VNCLIP_WX:
+ // Only the low lg2(2*SEW) bits of the shift-amount value are used.
+ if (Bits < Log2SEW + 1)
+ return false;
+ break;
+
+ // 11.1. Vector Single-Width Integer Add and Subtract
+ case RISCV::VADD_VX:
+ case RISCV::VSUB_VX:
+ case RISCV::VRSUB_VX:
+ // 11.2. Vector Widening Integer Add/Subtract
+ case RISCV::VWADDU_VX:
+ case RISCV::VWSUBU_VX:
+ case RISCV::VWADD_VX:
+ case RISCV::VWSUB_VX:
+ case RISCV::VWADDU_WX:
+ case RISCV::VWSUBU_WX:
+ case RISCV::VWADD_WX:
+ case RISCV::VWSUB_WX:
+ // 11.4. Vector Integer Add-with-Carry / Subtract-with-Borrow Instructions
+ case RISCV::VADC_VXM:
+ case RISCV::VADC_VIM:
+ case RISCV::VMADC_VXM:
+ case RISCV::VMADC_VIM:
+ case RISCV::VMADC_VX:
+ case RISCV::VSBC_VXM:
+ case RISCV::VMSBC_VXM:
+ case RISCV::VMSBC_VX:
+ // 11.5 Vector Bitwise Logical Instructions
+ case RISCV::VAND_VX:
+ case RISCV::VOR_VX:
+ case RISCV::VXOR_VX:
+ // 11.8. Vector Integer Compare Instructions
+ case RISCV::VMSEQ_VX:
+ case RISCV::VMSNE_VX:
+ case RISCV::VMSLTU_VX:
+ case RISCV::VMSLT_VX:
+ case RISCV::VMSLEU_VX:
+ case RISCV::VMSLE_VX:
+ case RISCV::VMSGTU_VX:
+ case RISCV::VMSGT_VX:
+ // 11.9. Vector Integer Min/Max Instructions
+ case RISCV::VMINU_VX:
+ case RISCV::VMIN_VX:
+ case RISCV::VMAXU_VX:
+ case RISCV::VMAX_VX:
+ // 11.10. Vector Single-Width Integer Multiply Instructions
+ case RISCV::VMUL_VX:
+ case RISCV::VMULH_VX:
+ case RISCV::VMULHU_VX:
+ case RISCV::VMULHSU_VX:
+ // 11.11. Vector Integer Divide Instructions
+ case RISCV::VDIVU_VX:
+ case RISCV::VDIV_VX:
+ case RISCV::VREMU_VX:
+ case RISCV::VREM_VX:
+ // 11.12. Vector Widening Integer Multiply Instructions
+ case RISCV::VWMUL_VX:
+ case RISCV::VWMULU_VX:
+ case RISCV::VWMULSU_VX:
+ // 11.13. Vector Single-Width Integer Multiply-Add Instructions
+ case RISCV::VMACC_VX:
+ case RISCV::VNMSAC_VX:
+ case RISCV::VMADD_VX:
+ case RISCV::VNMSUB_VX:
+ // 11.14. Vector Widening Integer Multiply-Add Instructions
+ case RISCV::VWMACCU_VX:
+ case RISCV::VWMACC_VX:
+ case RISCV::VWMACCSU_VX:
+ case RISCV::VWMACCUS_VX:
+ // 11.15. Vector Integer Merge Instructions
+ case RISCV::VMERGE_VXM:
+ // 11.16. Vector Integer Move Instructions
+ case RISCV::VMV_V_X:
+ // 12.1. Vector Single-Width Saturating Add and Subtract
+ case RISCV::VSADDU_VX:
+ case RISCV::VSADD_VX:
+ case RISCV::VSSUBU_VX:
+ case RISCV::VSSUB_VX:
+ // 12.2. Vector Single-Width Averaging Add and Subtract
+ case RISCV::VAADDU_VX:
+ case RISCV::VAADD_VX:
+ case RISCV::VASUBU_VX:
+ case RISCV::VASUB_VX:
+ // 12.3. Vector Single-Width Fractional Multiply with Rounding and Saturation
+ case RISCV::VSMUL_VX:
+ // 16.1. Integer Scalar Move Instructions
+ case RISCV::VMV_S_X:
+ if (Bits < (1 << Log2SEW))
+ return false;
+ }
+ return true;
+}
+
// Checks if all users only demand the lower \p OrigBits of the original
// instruction's result.
// TODO: handle multiple interdependent transformations
@@ -108,6 +243,8 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
switch (UserMI->getOpcode()) {
default:
+ if (vectorPseudoHasAllNBitUsers(UserOp, Bits))
+ break;
return false;
case RISCV::ADDIW:
diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
index e3a878052ee19b6..b3f561a52f41143 100644
--- a/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s \
-; RUN: | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s \
-; RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
; These tests check that the scalable-vector version of this series of
; instructions does not get into an infinite DAGCombine loop. This was
@@ -14,26 +12,15 @@
; a constant SPLAT_VECTOR didn't follow suit.
define <2 x i16> @fixedlen(<2 x i32> %x) {
-; RV32-LABEL: fixedlen:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vsrl.vi v8, v8, 16
-; RV32-NEXT: lui a0, 1048568
-; RV32-NEXT: vand.vx v8, v8, a0
-; RV32-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV32-NEXT: vnsrl.wi v8, v8, 0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: fixedlen:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV64-NEXT: vsrl.vi v8, v8, 16
-; RV64-NEXT: lui a0, 131071
-; RV64-NEXT: slli a0, a0, 3
-; RV64-NEXT: vand.vx v8, v8, a0
-; RV64-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
-; RV64-NEXT: vnsrl.wi v8, v8, 0
-; RV64-NEXT: ret
+; CHECK-LABEL: fixedlen:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v8, 16
+; CHECK-NEXT: lui a0, 1048568
+; CHECK-NEXT: vand.vx v8, v8, a0
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: ret
%v41 = insertelement <2 x i32> poison, i32 16, i32 0
%v42 = shufflevector <2 x i32> %v41, <2 x i32> poison, <2 x i32> zeroinitializer
%v43 = lshr <2 x i32> %x, %v42
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
index 3167bcf26837b6f..25177734325ce15 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll
@@ -1,71 +1,39 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s -check-prefixes=CHECK,RV32
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s -check-prefixes=CHECK,RV64
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s
; Integers
define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) {
-; RV32-LABEL: vector_deinterleave_load_v16i1_v32i1:
-; RV32: # %bb.0:
-; RV32-NEXT: li a1, 32
-; RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; RV32-NEXT: vlm.v v0, (a0)
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vmerge.vim v10, v8, 1, v0
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vadd.vv v11, v9, v9
-; RV32-NEXT: vrgather.vv v9, v10, v11
-; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v0, v0, 2
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; RV32-NEXT: vadd.vi v12, v11, -16
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV32-NEXT: vrgather.vv v9, v8, v12, v0.t
-; RV32-NEXT: vmsne.vi v9, v9, 0
-; RV32-NEXT: vadd.vi v12, v11, 1
-; RV32-NEXT: vrgather.vv v13, v10, v12
-; RV32-NEXT: vadd.vi v10, v11, -15
-; RV32-NEXT: vrgather.vv v13, v8, v10, v0.t
-; RV32-NEXT: vmsne.vi v8, v13, 0
-; RV32-NEXT: vmv.v.v v0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vector_deinterleave_load_v16i1_v32i1:
-; RV64: # %bb.0:
-; RV64-NEXT: li a1, 32
-; RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; RV64-NEXT: vlm.v v0, (a0)
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vmerge.vim v10, v8, 1, v0
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vadd.vv v11, v9, v9
-; RV64-NEXT: vrgather.vv v9, v10, v11
-; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v0, v0, 2
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; RV64-NEXT: vadd.vi v12, v11, -16
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vmv.s.x v0, a0
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64-NEXT: vrgather.vv v9, v8, v12, v0.t
-; RV64-NEXT: vmsne.vi v9, v9, 0
-; RV64-NEXT: vadd.vi v12, v11, 1
-; RV64-NEXT: vrgather.vv v13, v10, v12
-; RV64-NEXT: vadd.vi v10, v11, -15
-; RV64-NEXT: vrgather.vv v13, v8, v10, v0.t
-; RV64-NEXT: vmsne.vi v8, v13, 0
-; RV64-NEXT: vmv.v.v v0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vector_deinterleave_load_v16i1_v32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 32
+; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
+; CHECK-NEXT: vlm.v v0, (a0)
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vadd.vv v11, v9, v9
+; CHECK-NEXT: vrgather.vv v9, v10, v11
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: vadd.vi v12, v11, -16
+; CHECK-NEXT: li a0, -256
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t
+; CHECK-NEXT: vmsne.vi v9, v9, 0
+; CHECK-NEXT: vadd.vi v12, v11, 1
+; CHECK-NEXT: vrgather.vv v13, v10, v12
+; CHECK-NEXT: vadd.vi v10, v11, -15
+; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t
+; CHECK-NEXT: vmsne.vi v8, v13, 0
+; CHECK-NEXT: vmv.v.v v0, v9
+; CHECK-NEXT: ret
%vec = load <32 x i1>, ptr %p
%retval = call {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1> %vec)
ret {<16 x i1>, <16 x i1>} %retval
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
index 894e96d682871ae..6da5ca06a79c238 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll
@@ -585,8 +585,7 @@ define signext i32 @vpreduce_umax_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %
;
; RV64-LABEL: vpreduce_umax_v2i32:
; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
@@ -626,8 +625,7 @@ define signext i32 @vpreduce_umin_v2i32(i32 signext %s, <2 x i32> %v, <2 x i1> %
;
; RV64-LABEL: vpreduce_umin_v2i32:
; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
@@ -727,8 +725,7 @@ define signext i32 @vpreduce_umax_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %
;
; RV64-LABEL: vpreduce_umax_v4i32:
; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
@@ -768,8 +765,7 @@ define signext i32 @vpreduce_umin_v4i32(i32 signext %s, <4 x i32> %v, <4 x i1> %
;
; RV64-LABEL: vpreduce_umin_v4i32:
; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
index be8281181cd2d04..82c9f405c23923d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/named-vector-shuffle-reverse.ll
@@ -1,10 +1,10 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-UNKNOWN
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-256
-; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32-BITS-512
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-UNKNOWN
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-256
-; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64-BITS-512
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-BITS-UNKNOWN
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-BITS-256
+; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32,RV32-BITS-512
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-BITS-UNKNOWN
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=256 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-BITS-256
+; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -riscv-v-vector-bits-max=512 -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64,RV64-BITS-512
;
; VECTOR_REVERSE - masks
@@ -65,7 +65,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v9
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0
@@ -82,7 +82,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: srli a0, a0, 2
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9
@@ -97,7 +97,7 @@ define <vscale x 2 x i1> @reverse_nxv2i1(<vscale x 2 x i1> %a) {
; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: srli a0, a0, 2
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9
@@ -163,7 +163,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m1, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v9
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v9, v9, a0
@@ -180,7 +180,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: srli a0, a0, 1
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9
@@ -195,7 +195,7 @@ define <vscale x 4 x i1> @reverse_nxv4i1(<vscale x 4 x i1> %a) {
; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: srli a0, a0, 1
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9
@@ -257,7 +257,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vmv.v.i v8, 0
; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m2, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v10
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0
@@ -273,7 +273,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV64-BITS-256-NEXT: vmv.v.i v8, 0
; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: csrr a0, vlenb
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-256-NEXT: vrgather.vv v10, v8, v9
@@ -287,7 +287,7 @@ define <vscale x 8 x i1> @reverse_nxv8i1(<vscale x 8 x i1> %a) {
; RV64-BITS-512-NEXT: vmv.v.i v8, 0
; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-512-NEXT: csrr a0, vlenb
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v9, v9, a0
; RV64-BITS-512-NEXT: vrgather.vv v10, v8, v9
@@ -353,7 +353,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m4, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v12
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0
@@ -370,7 +370,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: slli a0, a0, 1
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vid.v v10
; RV64-BITS-256-NEXT: vrsub.vx v10, v10, a0
; RV64-BITS-256-NEXT: vrgather.vv v12, v8, v10
@@ -385,7 +385,7 @@ define <vscale x 16 x i1> @reverse_nxv16i1(<vscale x 16 x i1> %a) {
; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: slli a0, a0, 1
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vid.v v10
; RV64-BITS-512-NEXT: vrsub.vx v10, v10, a0
; RV64-BITS-512-NEXT: vrgather.vv v12, v8, v10
@@ -451,7 +451,7 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV64-BITS-UNKNOWN-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v16
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0
@@ -468,7 +468,7 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: slli a0, a0, 2
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vid.v v12
; RV64-BITS-256-NEXT: vrsub.vx v12, v12, a0
; RV64-BITS-256-NEXT: vrgather.vv v16, v8, v12
@@ -483,7 +483,7 @@ define <vscale x 32 x i1> @reverse_nxv32i1(<vscale x 32 x i1> %a) {
; RV64-BITS-512-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: slli a0, a0, 2
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vid.v v12
; RV64-BITS-512-NEXT: vrsub.vx v12, v12, a0
; RV64-BITS-512-NEXT: vrgather.vv v16, v8, v12
@@ -552,7 +552,7 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v8
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v8, v8, a0
@@ -574,7 +574,7 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV64-BITS-256-NEXT: vmerge.vim v8, v8, 1, v0
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: slli a0, a0, 3
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vid.v v16
; RV64-BITS-256-NEXT: vrsub.vx v16, v16, a0
; RV64-BITS-256-NEXT: vrgather.vv v24, v8, v16
@@ -586,7 +586,7 @@ define <vscale x 64 x i1> @reverse_nxv64i1(<vscale x 64 x i1> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: slli a0, a0, 2
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; RV64-BITS-512-NEXT: vid.v v8
; RV64-BITS-512-NEXT: vrsub.vx v8, v8, a0
@@ -650,7 +650,7 @@ define <vscale x 1 x i8> @reverse_nxv1i8(<vscale x 1 x i8> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 3
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v9
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0
@@ -663,7 +663,7 @@ define <vscale x 1 x i8> @reverse_nxv1i8(<vscale x 1 x i8> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: srli a0, a0, 3
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0
@@ -675,7 +675,7 @@ define <vscale x 1 x i8> @reverse_nxv1i8(<vscale x 1 x i8> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: srli a0, a0, 3
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0
@@ -728,7 +728,7 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v9
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0
@@ -741,7 +741,7 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: srli a0, a0, 2
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0
@@ -753,7 +753,7 @@ define <vscale x 2 x i8> @reverse_nxv2i8(<vscale x 2 x i8> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: srli a0, a0, 2
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0
@@ -806,7 +806,7 @@ define <vscale x 4 x i8> @reverse_nxv4i8(<vscale x 4 x i8> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: srli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m1, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v9
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v9, a0
@@ -819,7 +819,7 @@ define <vscale x 4 x i8> @reverse_nxv4i8(<vscale x 4 x i8> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: srli a0, a0, 1
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0
@@ -831,7 +831,7 @@ define <vscale x 4 x i8> @reverse_nxv4i8(<vscale x 4 x i8> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: srli a0, a0, 1
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0
@@ -880,7 +880,7 @@ define <vscale x 8 x i8> @reverse_nxv8i8(<vscale x 8 x i8> %a) {
; RV64-BITS-UNKNOWN-LABEL: reverse_nxv8i8:
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m2, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v10
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v10, v10, a0
@@ -892,7 +892,7 @@ define <vscale x 8 x i8> @reverse_nxv8i8(<vscale x 8 x i8> %a) {
; RV64-BITS-256-LABEL: reverse_nxv8i8:
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; RV64-BITS-256-NEXT: vid.v v9
; RV64-BITS-256-NEXT: vrsub.vx v10, v9, a0
@@ -903,7 +903,7 @@ define <vscale x 8 x i8> @reverse_nxv8i8(<vscale x 8 x i8> %a) {
; RV64-BITS-512-LABEL: reverse_nxv8i8:
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m1, ta, ma
; RV64-BITS-512-NEXT: vid.v v9
; RV64-BITS-512-NEXT: vrsub.vx v10, v9, a0
@@ -956,7 +956,7 @@ define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 1
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m4, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v12
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v12, v12, a0
@@ -969,7 +969,7 @@ define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: slli a0, a0, 1
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; RV64-BITS-256-NEXT: vid.v v10
; RV64-BITS-256-NEXT: vrsub.vx v12, v10, a0
@@ -981,7 +981,7 @@ define <vscale x 16 x i8> @reverse_nxv16i8(<vscale x 16 x i8> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: slli a0, a0, 1
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m2, ta, ma
; RV64-BITS-512-NEXT: vid.v v10
; RV64-BITS-512-NEXT: vrsub.vx v12, v10, a0
@@ -1034,7 +1034,7 @@ define <vscale x 32 x i8> @reverse_nxv32i8(<vscale x 32 x i8> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v16
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v16, v16, a0
@@ -1047,7 +1047,7 @@ define <vscale x 32 x i8> @reverse_nxv32i8(<vscale x 32 x i8> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: slli a0, a0, 2
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; RV64-BITS-256-NEXT: vid.v v12
; RV64-BITS-256-NEXT: vrsub.vx v16, v12, a0
@@ -1059,7 +1059,7 @@ define <vscale x 32 x i8> @reverse_nxv32i8(<vscale x 32 x i8> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: slli a0, a0, 2
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; RV64-BITS-512-NEXT: vid.v v12
; RV64-BITS-512-NEXT: vrsub.vx v16, v12, a0
@@ -1114,7 +1114,7 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) {
; RV64-BITS-UNKNOWN: # %bb.0:
; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 2
-; RV64-BITS-UNKNOWN-NEXT: addi a0, a0, -1
+; RV64-BITS-UNKNOWN-NEXT: addiw a0, a0, -1
; RV64-BITS-UNKNOWN-NEXT: vsetvli a1, zero, e16, m8, ta, ma
; RV64-BITS-UNKNOWN-NEXT: vid.v v16
; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v24, v16, a0
@@ -1128,7 +1128,7 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) {
; RV64-BITS-256: # %bb.0:
; RV64-BITS-256-NEXT: csrr a0, vlenb
; RV64-BITS-256-NEXT: slli a0, a0, 3
-; RV64-BITS-256-NEXT: addi a0, a0, -1
+; RV64-BITS-256-NEXT: addiw a0, a0, -1
; RV64-BITS-256-NEXT: vsetvli a1, zero, e8, m8, ta, ma
; RV64-BITS-256-NEXT: vid.v v16
; RV64-BITS-256-NEXT: vrsub.vx v24, v16, a0
@@ -1140,7 +1140,7 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) {
; RV64-BITS-512: # %bb.0:
; RV64-BITS-512-NEXT: csrr a0, vlenb
; RV64-BITS-512-NEXT: slli a0, a0, 2
-; RV64-BITS-512-NEXT: addi a0, a0, -1
+; RV64-BITS-512-NEXT: addiw a0, a0, -1
; RV64-BITS-512-NEXT: vsetvli a1, zero, e8, m4, ta, ma
; RV64-BITS-512-NEXT: vid.v v16
; RV64-BITS-512-NEXT: vrsub.vx v24, v16, a0
@@ -1153,175 +1153,305 @@ define <vscale x 64 x i8> @reverse_nxv64i8(<vscale x 64 x i8> %a) {
}
define <vscale x 1 x i16> @reverse_nxv1i16(<vscale x 1 x i16> %a) {
-; CHECK-LABEL: reverse_nxv1i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vrsub.vx v10, v9, a0
-; CHECK-NEXT: vrgather.vv v9, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv1i16:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a0, a0, 3
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: vrsub.vx v10, v9, a0
+; RV32-NEXT: vrgather.vv v9, v8, v10
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv1i16:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a0, a0, 3
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; RV64-NEXT: vid.v v9
+; RV64-NEXT: vrsub.vx v10, v9, a0
+; RV64-NEXT: vrgather.vv v9, v8, v10
+; RV64-NEXT: vmv1r.v v8, v9
+; RV64-NEXT: ret
%res = call <vscale x 1 x i16> @llvm.experimental.vector.reverse.nxv1i16(<vscale x 1 x i16> %a)
ret <vscale x 1 x i16> %res
}
define <vscale x 2 x i16> @reverse_nxv2i16(<vscale x 2 x i16> %a) {
-; CHECK-LABEL: reverse_nxv2i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vrsub.vx v10, v9, a0
-; CHECK-NEXT: vrgather.vv v9, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv2i16:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a0, a0, 2
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: vrsub.vx v10, v9, a0
+; RV32-NEXT: vrgather.vv v9, v8, v10
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv2i16:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a0, a0, 2
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; RV64-NEXT: vid.v v9
+; RV64-NEXT: vrsub.vx v10, v9, a0
+; RV64-NEXT: vrgather.vv v9, v8, v10
+; RV64-NEXT: vmv1r.v v8, v9
+; RV64-NEXT: ret
%res = call <vscale x 2 x i16> @llvm.experimental.vector.reverse.nxv2i16(<vscale x 2 x i16> %a)
ret <vscale x 2 x i16> %res
}
define <vscale x 4 x i16> @reverse_nxv4i16(<vscale x 4 x i16> %a) {
-; CHECK-LABEL: reverse_nxv4i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vrsub.vx v10, v9, a0
-; CHECK-NEXT: vrgather.vv v9, v8, v10
-; CHECK-NEXT: vmv.v.v v8, v9
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv4i16:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a0, a0, 1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: vrsub.vx v10, v9, a0
+; RV32-NEXT: vrgather.vv v9, v8, v10
+; RV32-NEXT: vmv.v.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv4i16:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a0, a0, 1
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; RV64-NEXT: vid.v v9
+; RV64-NEXT: vrsub.vx v10, v9, a0
+; RV64-NEXT: vrgather.vv v9, v8, v10
+; RV64-NEXT: vmv.v.v v8, v9
+; RV64-NEXT: ret
%res = call <vscale x 4 x i16> @llvm.experimental.vector.reverse.nxv4i16(<vscale x 4 x i16> %a)
ret <vscale x 4 x i16> %res
}
define <vscale x 8 x i16> @reverse_nxv8i16(<vscale x 8 x i16> %a) {
-; CHECK-LABEL: reverse_nxv8i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vrsub.vx v12, v10, a0
-; CHECK-NEXT: vrgather.vv v10, v8, v12
-; CHECK-NEXT: vmv.v.v v8, v10
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv8i16:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; RV32-NEXT: vid.v v10
+; RV32-NEXT: vrsub.vx v12, v10, a0
+; RV32-NEXT: vrgather.vv v10, v8, v12
+; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv8i16:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; RV64-NEXT: vid.v v10
+; RV64-NEXT: vrsub.vx v12, v10, a0
+; RV64-NEXT: vrgather.vv v10, v8, v12
+; RV64-NEXT: vmv.v.v v8, v10
+; RV64-NEXT: ret
%res = call <vscale x 8 x i16> @llvm.experimental.vector.reverse.nxv8i16(<vscale x 8 x i16> %a)
ret <vscale x 8 x i16> %res
}
define <vscale x 16 x i16> @reverse_nxv16i16(<vscale x 16 x i16> %a) {
-; CHECK-LABEL: reverse_nxv16i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vrsub.vx v16, v12, a0
-; CHECK-NEXT: vrgather.vv v12, v8, v16
-; CHECK-NEXT: vmv.v.v v8, v12
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv16i16:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; RV32-NEXT: vid.v v12
+; RV32-NEXT: vrsub.vx v16, v12, a0
+; RV32-NEXT: vrgather.vv v12, v8, v16
+; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv16i16:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 1
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; RV64-NEXT: vid.v v12
+; RV64-NEXT: vrsub.vx v16, v12, a0
+; RV64-NEXT: vrgather.vv v12, v8, v16
+; RV64-NEXT: vmv.v.v v8, v12
+; RV64-NEXT: ret
%res = call <vscale x 16 x i16> @llvm.experimental.vector.reverse.nxv16i16(<vscale x 16 x i16> %a)
ret <vscale x 16 x i16> %res
}
define <vscale x 32 x i16> @reverse_nxv32i16(<vscale x 32 x i16> %a) {
-; CHECK-LABEL: reverse_nxv32i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vrsub.vx v24, v16, a0
-; CHECK-NEXT: vrgather.vv v16, v8, v24
-; CHECK-NEXT: vmv.v.v v8, v16
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv32i16:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; RV32-NEXT: vid.v v16
+; RV32-NEXT: vrsub.vx v24, v16, a0
+; RV32-NEXT: vrgather.vv v16, v8, v24
+; RV32-NEXT: vmv.v.v v8, v16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv32i16:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 2
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; RV64-NEXT: vid.v v16
+; RV64-NEXT: vrsub.vx v24, v16, a0
+; RV64-NEXT: vrgather.vv v16, v8, v24
+; RV64-NEXT: vmv.v.v v8, v16
+; RV64-NEXT: ret
%res = call <vscale x 32 x i16> @llvm.experimental.vector.reverse.nxv32i16(<vscale x 32 x i16> %a)
ret <vscale x 32 x i16> %res
}
define <vscale x 1 x i32> @reverse_nxv1i32(<vscale x 1 x i32> %a) {
-; CHECK-LABEL: reverse_nxv1i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vrsub.vx v10, v9, a0
-; CHECK-NEXT: vrgather.vv v9, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv1i32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a0, a0, 3
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: vrsub.vx v10, v9, a0
+; RV32-NEXT: vrgather.vv v9, v8, v10
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv1i32:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a0, a0, 3
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; RV64-NEXT: vid.v v9
+; RV64-NEXT: vrsub.vx v10, v9, a0
+; RV64-NEXT: vrgather.vv v9, v8, v10
+; RV64-NEXT: vmv1r.v v8, v9
+; RV64-NEXT: ret
%res = call <vscale x 1 x i32> @llvm.experimental.vector.reverse.nxv1i32(<vscale x 1 x i32> %a)
ret <vscale x 1 x i32> %res
}
define <vscale x 2 x i32> @reverse_nxv2i32(<vscale x 2 x i32> %a) {
-; CHECK-LABEL: reverse_nxv2i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vrsub.vx v10, v9, a0
-; CHECK-NEXT: vrgather.vv v9, v8, v10
-; CHECK-NEXT: vmv.v.v v8, v9
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv2i32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a0, a0, 2
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: vrsub.vx v10, v9, a0
+; RV32-NEXT: vrgather.vv v9, v8, v10
+; RV32-NEXT: vmv.v.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv2i32:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a0, a0, 2
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; RV64-NEXT: vid.v v9
+; RV64-NEXT: vrsub.vx v10, v9, a0
+; RV64-NEXT: vrgather.vv v9, v8, v10
+; RV64-NEXT: vmv.v.v v8, v9
+; RV64-NEXT: ret
%res = call <vscale x 2 x i32> @llvm.experimental.vector.reverse.nxv2i32(<vscale x 2 x i32> %a)
ret <vscale x 2 x i32> %res
}
define <vscale x 4 x i32> @reverse_nxv4i32(<vscale x 4 x i32> %a) {
-; CHECK-LABEL: reverse_nxv4i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vrsub.vx v12, v10, a0
-; CHECK-NEXT: vrgather.vv v10, v8, v12
-; CHECK-NEXT: vmv.v.v v8, v10
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv4i32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a0, a0, 1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; RV32-NEXT: vid.v v10
+; RV32-NEXT: vrsub.vx v12, v10, a0
+; RV32-NEXT: vrgather.vv v10, v8, v12
+; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv4i32:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a0, a0, 1
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; RV64-NEXT: vid.v v10
+; RV64-NEXT: vrsub.vx v12, v10, a0
+; RV64-NEXT: vrgather.vv v10, v8, v12
+; RV64-NEXT: vmv.v.v v8, v10
+; RV64-NEXT: ret
%res = call <vscale x 4 x i32> @llvm.experimental.vector.reverse.nxv4i32(<vscale x 4 x i32> %a)
ret <vscale x 4 x i32> %res
}
define <vscale x 8 x i32> @reverse_nxv8i32(<vscale x 8 x i32> %a) {
-; CHECK-LABEL: reverse_nxv8i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vrsub.vx v16, v12, a0
-; CHECK-NEXT: vrgather.vv v12, v8, v16
-; CHECK-NEXT: vmv.v.v v8, v12
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv8i32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; RV32-NEXT: vid.v v12
+; RV32-NEXT: vrsub.vx v16, v12, a0
+; RV32-NEXT: vrgather.vv v12, v8, v16
+; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv8i32:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; RV64-NEXT: vid.v v12
+; RV64-NEXT: vrsub.vx v16, v12, a0
+; RV64-NEXT: vrgather.vv v12, v8, v16
+; RV64-NEXT: vmv.v.v v8, v12
+; RV64-NEXT: ret
%res = call <vscale x 8 x i32> @llvm.experimental.vector.reverse.nxv8i32(<vscale x 8 x i32> %a)
ret <vscale x 8 x i32> %res
}
define <vscale x 16 x i32> @reverse_nxv16i32(<vscale x 16 x i32> %a) {
-; CHECK-LABEL: reverse_nxv16i32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vrsub.vx v24, v16, a0
-; CHECK-NEXT: vrgather.vv v16, v8, v24
-; CHECK-NEXT: vmv.v.v v8, v16
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv16i32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; RV32-NEXT: vid.v v16
+; RV32-NEXT: vrsub.vx v24, v16, a0
+; RV32-NEXT: vrgather.vv v16, v8, v24
+; RV32-NEXT: vmv.v.v v8, v16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv16i32:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 1
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; RV64-NEXT: vid.v v16
+; RV64-NEXT: vrsub.vx v24, v16, a0
+; RV64-NEXT: vrgather.vv v16, v8, v24
+; RV64-NEXT: vmv.v.v v8, v16
+; RV64-NEXT: ret
%res = call <vscale x 16 x i32> @llvm.experimental.vector.reverse.nxv16i32(<vscale x 16 x i32> %a)
ret <vscale x 16 x i32> %res
}
@@ -1394,175 +1524,305 @@ define <vscale x 8 x i64> @reverse_nxv8i64(<vscale x 8 x i64> %a) {
;
define <vscale x 1 x half> @reverse_nxv1f16(<vscale x 1 x half> %a) {
-; CHECK-LABEL: reverse_nxv1f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vrsub.vx v10, v9, a0
-; CHECK-NEXT: vrgather.vv v9, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv1f16:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a0, a0, 3
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: vrsub.vx v10, v9, a0
+; RV32-NEXT: vrgather.vv v9, v8, v10
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv1f16:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a0, a0, 3
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
+; RV64-NEXT: vid.v v9
+; RV64-NEXT: vrsub.vx v10, v9, a0
+; RV64-NEXT: vrgather.vv v9, v8, v10
+; RV64-NEXT: vmv1r.v v8, v9
+; RV64-NEXT: ret
%res = call <vscale x 1 x half> @llvm.experimental.vector.reverse.nxv1f16(<vscale x 1 x half> %a)
ret <vscale x 1 x half> %res
}
define <vscale x 2 x half> @reverse_nxv2f16(<vscale x 2 x half> %a) {
-; CHECK-LABEL: reverse_nxv2f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vrsub.vx v10, v9, a0
-; CHECK-NEXT: vrgather.vv v9, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv2f16:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a0, a0, 2
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: vrsub.vx v10, v9, a0
+; RV32-NEXT: vrgather.vv v9, v8, v10
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv2f16:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a0, a0, 2
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
+; RV64-NEXT: vid.v v9
+; RV64-NEXT: vrsub.vx v10, v9, a0
+; RV64-NEXT: vrgather.vv v9, v8, v10
+; RV64-NEXT: vmv1r.v v8, v9
+; RV64-NEXT: ret
%res = call <vscale x 2 x half> @llvm.experimental.vector.reverse.nxv2f16(<vscale x 2 x half> %a)
ret <vscale x 2 x half> %res
}
define <vscale x 4 x half> @reverse_nxv4f16(<vscale x 4 x half> %a) {
-; CHECK-LABEL: reverse_nxv4f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vrsub.vx v10, v9, a0
-; CHECK-NEXT: vrgather.vv v9, v8, v10
-; CHECK-NEXT: vmv.v.v v8, v9
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv4f16:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a0, a0, 1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: vrsub.vx v10, v9, a0
+; RV32-NEXT: vrgather.vv v9, v8, v10
+; RV32-NEXT: vmv.v.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv4f16:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a0, a0, 1
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e16, m1, ta, ma
+; RV64-NEXT: vid.v v9
+; RV64-NEXT: vrsub.vx v10, v9, a0
+; RV64-NEXT: vrgather.vv v9, v8, v10
+; RV64-NEXT: vmv.v.v v8, v9
+; RV64-NEXT: ret
%res = call <vscale x 4 x half> @llvm.experimental.vector.reverse.nxv4f16(<vscale x 4 x half> %a)
ret <vscale x 4 x half> %res
}
define <vscale x 8 x half> @reverse_nxv8f16(<vscale x 8 x half> %a) {
-; CHECK-LABEL: reverse_nxv8f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vrsub.vx v12, v10, a0
-; CHECK-NEXT: vrgather.vv v10, v8, v12
-; CHECK-NEXT: vmv.v.v v8, v10
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv8f16:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; RV32-NEXT: vid.v v10
+; RV32-NEXT: vrsub.vx v12, v10, a0
+; RV32-NEXT: vrgather.vv v10, v8, v12
+; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv8f16:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e16, m2, ta, ma
+; RV64-NEXT: vid.v v10
+; RV64-NEXT: vrsub.vx v12, v10, a0
+; RV64-NEXT: vrgather.vv v10, v8, v12
+; RV64-NEXT: vmv.v.v v8, v10
+; RV64-NEXT: ret
%res = call <vscale x 8 x half> @llvm.experimental.vector.reverse.nxv8f16(<vscale x 8 x half> %a)
ret <vscale x 8 x half> %res
}
define <vscale x 16 x half> @reverse_nxv16f16(<vscale x 16 x half> %a) {
-; CHECK-LABEL: reverse_nxv16f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
-; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vrsub.vx v16, v12, a0
-; CHECK-NEXT: vrgather.vv v12, v8, v16
-; CHECK-NEXT: vmv.v.v v8, v12
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv16f16:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; RV32-NEXT: vid.v v12
+; RV32-NEXT: vrsub.vx v16, v12, a0
+; RV32-NEXT: vrgather.vv v12, v8, v16
+; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv16f16:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 1
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e16, m4, ta, ma
+; RV64-NEXT: vid.v v12
+; RV64-NEXT: vrsub.vx v16, v12, a0
+; RV64-NEXT: vrgather.vv v12, v8, v16
+; RV64-NEXT: vmv.v.v v8, v12
+; RV64-NEXT: ret
%res = call <vscale x 16 x half> @llvm.experimental.vector.reverse.nxv16f16(<vscale x 16 x half> %a)
ret <vscale x 16 x half> %res
}
define <vscale x 32 x half> @reverse_nxv32f16(<vscale x 32 x half> %a) {
-; CHECK-LABEL: reverse_nxv32f16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 2
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vrsub.vx v24, v16, a0
-; CHECK-NEXT: vrgather.vv v16, v8, v24
-; CHECK-NEXT: vmv.v.v v8, v16
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv32f16:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 2
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; RV32-NEXT: vid.v v16
+; RV32-NEXT: vrsub.vx v24, v16, a0
+; RV32-NEXT: vrgather.vv v16, v8, v24
+; RV32-NEXT: vmv.v.v v8, v16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv32f16:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 2
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e16, m8, ta, ma
+; RV64-NEXT: vid.v v16
+; RV64-NEXT: vrsub.vx v24, v16, a0
+; RV64-NEXT: vrgather.vv v16, v8, v24
+; RV64-NEXT: vmv.v.v v8, v16
+; RV64-NEXT: ret
%res = call <vscale x 32 x half> @llvm.experimental.vector.reverse.nxv32f16(<vscale x 32 x half> %a)
ret <vscale x 32 x half> %res
}
define <vscale x 1 x float> @reverse_nxv1f32(<vscale x 1 x float> %a) {
-; CHECK-LABEL: reverse_nxv1f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 3
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vrsub.vx v10, v9, a0
-; CHECK-NEXT: vrgather.vv v9, v8, v10
-; CHECK-NEXT: vmv1r.v v8, v9
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv1f32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a0, a0, 3
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: vrsub.vx v10, v9, a0
+; RV32-NEXT: vrgather.vv v9, v8, v10
+; RV32-NEXT: vmv1r.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv1f32:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a0, a0, 3
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; RV64-NEXT: vid.v v9
+; RV64-NEXT: vrsub.vx v10, v9, a0
+; RV64-NEXT: vrgather.vv v9, v8, v10
+; RV64-NEXT: vmv1r.v v8, v9
+; RV64-NEXT: ret
%res = call <vscale x 1 x float> @llvm.experimental.vector.reverse.nxv1f32(<vscale x 1 x float> %a)
ret <vscale x 1 x float> %res
}
define <vscale x 2 x float> @reverse_nxv2f32(<vscale x 2 x float> %a) {
-; CHECK-LABEL: reverse_nxv2f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 2
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vrsub.vx v10, v9, a0
-; CHECK-NEXT: vrgather.vv v9, v8, v10
-; CHECK-NEXT: vmv.v.v v8, v9
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv2f32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a0, a0, 2
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; RV32-NEXT: vid.v v9
+; RV32-NEXT: vrsub.vx v10, v9, a0
+; RV32-NEXT: vrgather.vv v9, v8, v10
+; RV32-NEXT: vmv.v.v v8, v9
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv2f32:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a0, a0, 2
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; RV64-NEXT: vid.v v9
+; RV64-NEXT: vrsub.vx v10, v9, a0
+; RV64-NEXT: vrgather.vv v9, v8, v10
+; RV64-NEXT: vmv.v.v v8, v9
+; RV64-NEXT: ret
%res = call <vscale x 2 x float> @llvm.experimental.vector.reverse.nxv2f32(<vscale x 2 x float> %a)
ret <vscale x 2 x float> %res
}
define <vscale x 4 x float> @reverse_nxv4f32(<vscale x 4 x float> %a) {
-; CHECK-LABEL: reverse_nxv4f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: srli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vrsub.vx v12, v10, a0
-; CHECK-NEXT: vrgather.vv v10, v8, v12
-; CHECK-NEXT: vmv.v.v v8, v10
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv4f32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: srli a0, a0, 1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; RV32-NEXT: vid.v v10
+; RV32-NEXT: vrsub.vx v12, v10, a0
+; RV32-NEXT: vrgather.vv v10, v8, v12
+; RV32-NEXT: vmv.v.v v8, v10
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv4f32:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: srli a0, a0, 1
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; RV64-NEXT: vid.v v10
+; RV64-NEXT: vrsub.vx v12, v10, a0
+; RV64-NEXT: vrgather.vv v10, v8, v12
+; RV64-NEXT: vmv.v.v v8, v10
+; RV64-NEXT: ret
%res = call <vscale x 4 x float> @llvm.experimental.vector.reverse.nxv4f32(<vscale x 4 x float> %a)
ret <vscale x 4 x float> %res
}
define <vscale x 8 x float> @reverse_nxv8f32(<vscale x 8 x float> %a) {
-; CHECK-LABEL: reverse_nxv8f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vrsub.vx v16, v12, a0
-; CHECK-NEXT: vrgather.vv v12, v8, v16
-; CHECK-NEXT: vmv.v.v v8, v12
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv8f32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; RV32-NEXT: vid.v v12
+; RV32-NEXT: vrsub.vx v16, v12, a0
+; RV32-NEXT: vrgather.vv v12, v8, v16
+; RV32-NEXT: vmv.v.v v8, v12
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv8f32:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; RV64-NEXT: vid.v v12
+; RV64-NEXT: vrsub.vx v16, v12, a0
+; RV64-NEXT: vrgather.vv v12, v8, v16
+; RV64-NEXT: vmv.v.v v8, v12
+; RV64-NEXT: ret
%res = call <vscale x 8 x float> @llvm.experimental.vector.reverse.nxv8f32(<vscale x 8 x float> %a)
ret <vscale x 8 x float> %res
}
define <vscale x 16 x float> @reverse_nxv16f32(<vscale x 16 x float> %a) {
-; CHECK-LABEL: reverse_nxv16f32:
-; CHECK: # %bb.0:
-; CHECK-NEXT: csrr a0, vlenb
-; CHECK-NEXT: slli a0, a0, 1
-; CHECK-NEXT: addi a0, a0, -1
-; CHECK-NEXT: vsetvli a1, zero, e32, m8, ta, ma
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vrsub.vx v24, v16, a0
-; CHECK-NEXT: vrgather.vv v16, v8, v24
-; CHECK-NEXT: vmv.v.v v8, v16
-; CHECK-NEXT: ret
+; RV32-LABEL: reverse_nxv16f32:
+; RV32: # %bb.0:
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 1
+; RV32-NEXT: addi a0, a0, -1
+; RV32-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; RV32-NEXT: vid.v v16
+; RV32-NEXT: vrsub.vx v24, v16, a0
+; RV32-NEXT: vrgather.vv v16, v8, v24
+; RV32-NEXT: vmv.v.v v8, v16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv16f32:
+; RV64: # %bb.0:
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 1
+; RV64-NEXT: addiw a0, a0, -1
+; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma
+; RV64-NEXT: vid.v v16
+; RV64-NEXT: vrsub.vx v24, v16, a0
+; RV64-NEXT: vrgather.vv v16, v8, v24
+; RV64-NEXT: vmv.v.v v8, v16
+; RV64-NEXT: ret
%res = call <vscale x 16 x float> @llvm.experimental.vector.reverse.nxv16f32(<vscale x 16 x float> %a)
ret <vscale x 16 x float> %res
}
@@ -1668,221 +1928,77 @@ define <vscale x 6 x i64> @reverse_nxv6i64(<vscale x 6 x i64> %a) {
}
define <vscale x 12 x i64> @reverse_nxv12i64(<vscale x 12 x i64> %a) {
-; RV32-BITS-UNKNOWN-LABEL: reverse_nxv12i64:
-; RV32-BITS-UNKNOWN: # %bb.0:
-; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, -80
-; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 80
-; RV32-BITS-UNKNOWN-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; RV32-BITS-UNKNOWN-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; RV32-BITS-UNKNOWN-NEXT: .cfi_offset ra, -4
-; RV32-BITS-UNKNOWN-NEXT: .cfi_offset s0, -8
-; RV32-BITS-UNKNOWN-NEXT: addi s0, sp, 80
-; RV32-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
-; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
-; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 4
-; RV32-BITS-UNKNOWN-NEXT: sub sp, sp, a0
-; RV32-BITS-UNKNOWN-NEXT: andi sp, sp, -64
-; RV32-BITS-UNKNOWN-NEXT: csrr a0, vlenb
-; RV32-BITS-UNKNOWN-NEXT: addi a1, a0, -1
-; RV32-BITS-UNKNOWN-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-BITS-UNKNOWN-NEXT: vid.v v24
-; RV32-BITS-UNKNOWN-NEXT: vrsub.vx v24, v24, a1
-; RV32-BITS-UNKNOWN-NEXT: vrgather.vv v0, v16, v24
-; RV32-BITS-UNKNOWN-NEXT: vmv4r.v v16, v4
-; RV32-BITS-UNKNOWN-NEXT: vrgather.vv v0, v8, v24
-; RV32-BITS-UNKNOWN-NEXT: vmv4r.v v20, v0
-; RV32-BITS-UNKNOWN-NEXT: slli a0, a0, 3
-; RV32-BITS-UNKNOWN-NEXT: addi a1, sp, 64
-; RV32-BITS-UNKNOWN-NEXT: add a0, a1, a0
-; RV32-BITS-UNKNOWN-NEXT: vs4r.v v4, (a0)
-; RV32-BITS-UNKNOWN-NEXT: vs8r.v v16, (a1)
-; RV32-BITS-UNKNOWN-NEXT: vl8re64.v v16, (a0)
-; RV32-BITS-UNKNOWN-NEXT: vl8re64.v v8, (a1)
-; RV32-BITS-UNKNOWN-NEXT: addi sp, s0, -80
-; RV32-BITS-UNKNOWN-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; RV32-BITS-UNKNOWN-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; RV32-BITS-UNKNOWN-NEXT: addi sp, sp, 80
-; RV32-BITS-UNKNOWN-NEXT: ret
-;
-; RV32-BITS-256-LABEL: reverse_nxv12i64:
-; RV32-BITS-256: # %bb.0:
-; RV32-BITS-256-NEXT: addi sp, sp, -80
-; RV32-BITS-256-NEXT: .cfi_def_cfa_offset 80
-; RV32-BITS-256-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; RV32-BITS-256-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; RV32-BITS-256-NEXT: .cfi_offset ra, -4
-; RV32-BITS-256-NEXT: .cfi_offset s0, -8
-; RV32-BITS-256-NEXT: addi s0, sp, 80
-; RV32-BITS-256-NEXT: .cfi_def_cfa s0, 0
-; RV32-BITS-256-NEXT: csrr a0, vlenb
-; RV32-BITS-256-NEXT: slli a0, a0, 4
-; RV32-BITS-256-NEXT: sub sp, sp, a0
-; RV32-BITS-256-NEXT: andi sp, sp, -64
-; RV32-BITS-256-NEXT: csrr a0, vlenb
-; RV32-BITS-256-NEXT: addi a1, a0, -1
-; RV32-BITS-256-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-BITS-256-NEXT: vid.v v24
-; RV32-BITS-256-NEXT: vrsub.vx v24, v24, a1
-; RV32-BITS-256-NEXT: vrgather.vv v0, v16, v24
-; RV32-BITS-256-NEXT: vmv4r.v v16, v4
-; RV32-BITS-256-NEXT: vrgather.vv v0, v8, v24
-; RV32-BITS-256-NEXT: vmv4r.v v20, v0
-; RV32-BITS-256-NEXT: slli a0, a0, 3
-; RV32-BITS-256-NEXT: addi a1, sp, 64
-; RV32-BITS-256-NEXT: add a0, a1, a0
-; RV32-BITS-256-NEXT: vs4r.v v4, (a0)
-; RV32-BITS-256-NEXT: vs8r.v v16, (a1)
-; RV32-BITS-256-NEXT: vl8re64.v v16, (a0)
-; RV32-BITS-256-NEXT: vl8re64.v v8, (a1)
-; RV32-BITS-256-NEXT: addi sp, s0, -80
-; RV32-BITS-256-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; RV32-BITS-256-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; RV32-BITS-256-NEXT: addi sp, sp, 80
-; RV32-BITS-256-NEXT: ret
-;
-; RV32-BITS-512-LABEL: reverse_nxv12i64:
-; RV32-BITS-512: # %bb.0:
-; RV32-BITS-512-NEXT: addi sp, sp, -80
-; RV32-BITS-512-NEXT: .cfi_def_cfa_offset 80
-; RV32-BITS-512-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
-; RV32-BITS-512-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
-; RV32-BITS-512-NEXT: .cfi_offset ra, -4
-; RV32-BITS-512-NEXT: .cfi_offset s0, -8
-; RV32-BITS-512-NEXT: addi s0, sp, 80
-; RV32-BITS-512-NEXT: .cfi_def_cfa s0, 0
-; RV32-BITS-512-NEXT: csrr a0, vlenb
-; RV32-BITS-512-NEXT: slli a0, a0, 4
-; RV32-BITS-512-NEXT: sub sp, sp, a0
-; RV32-BITS-512-NEXT: andi sp, sp, -64
-; RV32-BITS-512-NEXT: csrr a0, vlenb
-; RV32-BITS-512-NEXT: addi a1, a0, -1
-; RV32-BITS-512-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV32-BITS-512-NEXT: vid.v v24
-; RV32-BITS-512-NEXT: vrsub.vx v24, v24, a1
-; RV32-BITS-512-NEXT: vrgather.vv v0, v16, v24
-; RV32-BITS-512-NEXT: vmv4r.v v16, v4
-; RV32-BITS-512-NEXT: vrgather.vv v0, v8, v24
-; RV32-BITS-512-NEXT: vmv4r.v v20, v0
-; RV32-BITS-512-NEXT: slli a0, a0, 3
-; RV32-BITS-512-NEXT: addi a1, sp, 64
-; RV32-BITS-512-NEXT: add a0, a1, a0
-; RV32-BITS-512-NEXT: vs4r.v v4, (a0)
-; RV32-BITS-512-NEXT: vs8r.v v16, (a1)
-; RV32-BITS-512-NEXT: vl8re64.v v16, (a0)
-; RV32-BITS-512-NEXT: vl8re64.v v8, (a1)
-; RV32-BITS-512-NEXT: addi sp, s0, -80
-; RV32-BITS-512-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
-; RV32-BITS-512-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
-; RV32-BITS-512-NEXT: addi sp, sp, 80
-; RV32-BITS-512-NEXT: ret
-;
-; RV64-BITS-UNKNOWN-LABEL: reverse_nxv12i64:
-; RV64-BITS-UNKNOWN: # %bb.0:
-; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, -80
-; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa_offset 80
-; RV64-BITS-UNKNOWN-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; RV64-BITS-UNKNOWN-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; RV64-BITS-UNKNOWN-NEXT: .cfi_offset ra, -8
-; RV64-BITS-UNKNOWN-NEXT: .cfi_offset s0, -16
-; RV64-BITS-UNKNOWN-NEXT: addi s0, sp, 80
-; RV64-BITS-UNKNOWN-NEXT: .cfi_def_cfa s0, 0
-; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
-; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 4
-; RV64-BITS-UNKNOWN-NEXT: sub sp, sp, a0
-; RV64-BITS-UNKNOWN-NEXT: andi sp, sp, -64
-; RV64-BITS-UNKNOWN-NEXT: csrr a0, vlenb
-; RV64-BITS-UNKNOWN-NEXT: addi a1, a0, -1
-; RV64-BITS-UNKNOWN-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV64-BITS-UNKNOWN-NEXT: vid.v v24
-; RV64-BITS-UNKNOWN-NEXT: vrsub.vx v24, v24, a1
-; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v0, v16, v24
-; RV64-BITS-UNKNOWN-NEXT: vmv4r.v v16, v4
-; RV64-BITS-UNKNOWN-NEXT: vrgather.vv v0, v8, v24
-; RV64-BITS-UNKNOWN-NEXT: vmv4r.v v20, v0
-; RV64-BITS-UNKNOWN-NEXT: slli a0, a0, 3
-; RV64-BITS-UNKNOWN-NEXT: addi a1, sp, 64
-; RV64-BITS-UNKNOWN-NEXT: add a0, a1, a0
-; RV64-BITS-UNKNOWN-NEXT: vs4r.v v4, (a0)
-; RV64-BITS-UNKNOWN-NEXT: vs8r.v v16, (a1)
-; RV64-BITS-UNKNOWN-NEXT: vl8re64.v v16, (a0)
-; RV64-BITS-UNKNOWN-NEXT: vl8re64.v v8, (a1)
-; RV64-BITS-UNKNOWN-NEXT: addi sp, s0, -80
-; RV64-BITS-UNKNOWN-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; RV64-BITS-UNKNOWN-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; RV64-BITS-UNKNOWN-NEXT: addi sp, sp, 80
-; RV64-BITS-UNKNOWN-NEXT: ret
-;
-; RV64-BITS-256-LABEL: reverse_nxv12i64:
-; RV64-BITS-256: # %bb.0:
-; RV64-BITS-256-NEXT: addi sp, sp, -80
-; RV64-BITS-256-NEXT: .cfi_def_cfa_offset 80
-; RV64-BITS-256-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; RV64-BITS-256-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; RV64-BITS-256-NEXT: .cfi_offset ra, -8
-; RV64-BITS-256-NEXT: .cfi_offset s0, -16
-; RV64-BITS-256-NEXT: addi s0, sp, 80
-; RV64-BITS-256-NEXT: .cfi_def_cfa s0, 0
-; RV64-BITS-256-NEXT: csrr a0, vlenb
-; RV64-BITS-256-NEXT: slli a0, a0, 4
-; RV64-BITS-256-NEXT: sub sp, sp, a0
-; RV64-BITS-256-NEXT: andi sp, sp, -64
-; RV64-BITS-256-NEXT: csrr a0, vlenb
-; RV64-BITS-256-NEXT: addi a1, a0, -1
-; RV64-BITS-256-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV64-BITS-256-NEXT: vid.v v24
-; RV64-BITS-256-NEXT: vrsub.vx v24, v24, a1
-; RV64-BITS-256-NEXT: vrgather.vv v0, v16, v24
-; RV64-BITS-256-NEXT: vmv4r.v v16, v4
-; RV64-BITS-256-NEXT: vrgather.vv v0, v8, v24
-; RV64-BITS-256-NEXT: vmv4r.v v20, v0
-; RV64-BITS-256-NEXT: slli a0, a0, 3
-; RV64-BITS-256-NEXT: addi a1, sp, 64
-; RV64-BITS-256-NEXT: add a0, a1, a0
-; RV64-BITS-256-NEXT: vs4r.v v4, (a0)
-; RV64-BITS-256-NEXT: vs8r.v v16, (a1)
-; RV64-BITS-256-NEXT: vl8re64.v v16, (a0)
-; RV64-BITS-256-NEXT: vl8re64.v v8, (a1)
-; RV64-BITS-256-NEXT: addi sp, s0, -80
-; RV64-BITS-256-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; RV64-BITS-256-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; RV64-BITS-256-NEXT: addi sp, sp, 80
-; RV64-BITS-256-NEXT: ret
-;
-; RV64-BITS-512-LABEL: reverse_nxv12i64:
-; RV64-BITS-512: # %bb.0:
-; RV64-BITS-512-NEXT: addi sp, sp, -80
-; RV64-BITS-512-NEXT: .cfi_def_cfa_offset 80
-; RV64-BITS-512-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
-; RV64-BITS-512-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
-; RV64-BITS-512-NEXT: .cfi_offset ra, -8
-; RV64-BITS-512-NEXT: .cfi_offset s0, -16
-; RV64-BITS-512-NEXT: addi s0, sp, 80
-; RV64-BITS-512-NEXT: .cfi_def_cfa s0, 0
-; RV64-BITS-512-NEXT: csrr a0, vlenb
-; RV64-BITS-512-NEXT: slli a0, a0, 4
-; RV64-BITS-512-NEXT: sub sp, sp, a0
-; RV64-BITS-512-NEXT: andi sp, sp, -64
-; RV64-BITS-512-NEXT: csrr a0, vlenb
-; RV64-BITS-512-NEXT: addi a1, a0, -1
-; RV64-BITS-512-NEXT: vsetvli a2, zero, e64, m8, ta, ma
-; RV64-BITS-512-NEXT: vid.v v24
-; RV64-BITS-512-NEXT: vrsub.vx v24, v24, a1
-; RV64-BITS-512-NEXT: vrgather.vv v0, v16, v24
-; RV64-BITS-512-NEXT: vmv4r.v v16, v4
-; RV64-BITS-512-NEXT: vrgather.vv v0, v8, v24
-; RV64-BITS-512-NEXT: vmv4r.v v20, v0
-; RV64-BITS-512-NEXT: slli a0, a0, 3
-; RV64-BITS-512-NEXT: addi a1, sp, 64
-; RV64-BITS-512-NEXT: add a0, a1, a0
-; RV64-BITS-512-NEXT: vs4r.v v4, (a0)
-; RV64-BITS-512-NEXT: vs8r.v v16, (a1)
-; RV64-BITS-512-NEXT: vl8re64.v v16, (a0)
-; RV64-BITS-512-NEXT: vl8re64.v v8, (a1)
-; RV64-BITS-512-NEXT: addi sp, s0, -80
-; RV64-BITS-512-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
-; RV64-BITS-512-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
-; RV64-BITS-512-NEXT: addi sp, sp, 80
-; RV64-BITS-512-NEXT: ret
+; RV32-LABEL: reverse_nxv12i64:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -80
+; RV32-NEXT: .cfi_def_cfa_offset 80
+; RV32-NEXT: sw ra, 76(sp) # 4-byte Folded Spill
+; RV32-NEXT: sw s0, 72(sp) # 4-byte Folded Spill
+; RV32-NEXT: .cfi_offset ra, -4
+; RV32-NEXT: .cfi_offset s0, -8
+; RV32-NEXT: addi s0, sp, 80
+; RV32-NEXT: .cfi_def_cfa s0, 0
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: slli a0, a0, 4
+; RV32-NEXT: sub sp, sp, a0
+; RV32-NEXT: andi sp, sp, -64
+; RV32-NEXT: csrr a0, vlenb
+; RV32-NEXT: addi a1, a0, -1
+; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; RV32-NEXT: vid.v v24
+; RV32-NEXT: vrsub.vx v24, v24, a1
+; RV32-NEXT: vrgather.vv v0, v16, v24
+; RV32-NEXT: vmv4r.v v16, v4
+; RV32-NEXT: vrgather.vv v0, v8, v24
+; RV32-NEXT: vmv4r.v v20, v0
+; RV32-NEXT: slli a0, a0, 3
+; RV32-NEXT: addi a1, sp, 64
+; RV32-NEXT: add a0, a1, a0
+; RV32-NEXT: vs4r.v v4, (a0)
+; RV32-NEXT: vs8r.v v16, (a1)
+; RV32-NEXT: vl8re64.v v16, (a0)
+; RV32-NEXT: vl8re64.v v8, (a1)
+; RV32-NEXT: addi sp, s0, -80
+; RV32-NEXT: lw ra, 76(sp) # 4-byte Folded Reload
+; RV32-NEXT: lw s0, 72(sp) # 4-byte Folded Reload
+; RV32-NEXT: addi sp, sp, 80
+; RV32-NEXT: ret
+;
+; RV64-LABEL: reverse_nxv12i64:
+; RV64: # %bb.0:
+; RV64-NEXT: addi sp, sp, -80
+; RV64-NEXT: .cfi_def_cfa_offset 80
+; RV64-NEXT: sd ra, 72(sp) # 8-byte Folded Spill
+; RV64-NEXT: sd s0, 64(sp) # 8-byte Folded Spill
+; RV64-NEXT: .cfi_offset ra, -8
+; RV64-NEXT: .cfi_offset s0, -16
+; RV64-NEXT: addi s0, sp, 80
+; RV64-NEXT: .cfi_def_cfa s0, 0
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: slli a0, a0, 4
+; RV64-NEXT: sub sp, sp, a0
+; RV64-NEXT: andi sp, sp, -64
+; RV64-NEXT: csrr a0, vlenb
+; RV64-NEXT: addi a1, a0, -1
+; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT: vid.v v24
+; RV64-NEXT: vrsub.vx v24, v24, a1
+; RV64-NEXT: vrgather.vv v0, v16, v24
+; RV64-NEXT: vmv4r.v v16, v4
+; RV64-NEXT: vrgather.vv v0, v8, v24
+; RV64-NEXT: vmv4r.v v20, v0
+; RV64-NEXT: slli a0, a0, 3
+; RV64-NEXT: addi a1, sp, 64
+; RV64-NEXT: add a0, a1, a0
+; RV64-NEXT: vs4r.v v4, (a0)
+; RV64-NEXT: vs8r.v v16, (a1)
+; RV64-NEXT: vl8re64.v v16, (a0)
+; RV64-NEXT: vl8re64.v v8, (a1)
+; RV64-NEXT: addi sp, s0, -80
+; RV64-NEXT: ld ra, 72(sp) # 8-byte Folded Reload
+; RV64-NEXT: ld s0, 64(sp) # 8-byte Folded Reload
+; RV64-NEXT: addi sp, sp, 80
+; RV64-NEXT: ret
%res = call <vscale x 12 x i64> @llvm.experimental.vector.reverse.nxv12i64(<vscale x 12 x i64> %a)
ret <vscale x 12 x i64> %res
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
index 443fe93a618c50b..56d98981947c3c0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/sshl_sat_vec.ll
@@ -32,13 +32,11 @@ define <4 x i32> @vec_v4i32(<4 x i32> %x, <4 x i32> %y) nounwind {
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: addiw a0, a0, -1
+; CHECK-NEXT: addiw a1, a0, -1
; CHECK-NEXT: vsll.vv v10, v8, v9
; CHECK-NEXT: vsra.vv v9, v10, v9
; CHECK-NEXT: vmsne.vv v8, v8, v9
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: li a0, 1
-; CHECK-NEXT: slli a0, a0, 31
+; CHECK-NEXT: vmv.v.x v9, a1
; CHECK-NEXT: vmerge.vxm v9, v9, a0, v0
; CHECK-NEXT: vmv.v.v v0, v8
; CHECK-NEXT: vmerge.vvm v8, v10, v9, v0
@@ -116,13 +114,11 @@ define <vscale x 4 x i32> @vec_nxv4i32(<vscale x 4 x i32> %x, <vscale x 4 x i32>
; CHECK-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-NEXT: vmsle.vi v0, v8, -1
; CHECK-NEXT: lui a0, 524288
-; CHECK-NEXT: addiw a0, a0, -1
+; CHECK-NEXT: addiw a1, a0, -1
; CHECK-NEXT: vsll.vv v12, v8, v10
; CHECK-NEXT: vsra.vv v14, v12, v10
; CHECK-NEXT: vmsne.vv v10, v8, v14
-; CHECK-NEXT: vmv.v.x v8, a0
-; CHECK-NEXT: li a0, 1
-; CHECK-NEXT: slli a0, a0, 31
+; CHECK-NEXT: vmv.v.x v8, a1
; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0
; CHECK-NEXT: vmv1r.v v0, v10
; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0
diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
index c674670d8d030a9..f7ccf2c32cde0cd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll
@@ -1,65 +1,36 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck --check-prefixes=CHECK,RV32 %s
-; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck --check-prefixes=CHECK,RV64 %s
+; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zfh,+zvfh | FileCheck %s
+; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zfh,+zvfh | FileCheck %s
; Integers
define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) {
-; RV32-LABEL: vector_deinterleave_v16i1_v32i1:
-; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vmv.v.i v8, 0
-; RV32-NEXT: vmerge.vim v10, v8, 1, v0
-; RV32-NEXT: vid.v v9
-; RV32-NEXT: vadd.vv v11, v9, v9
-; RV32-NEXT: vrgather.vv v9, v10, v11
-; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV32-NEXT: vslidedown.vi v0, v0, 2
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV32-NEXT: vmerge.vim v8, v8, 1, v0
-; RV32-NEXT: vadd.vi v12, v11, -16
-; RV32-NEXT: lui a0, 16
-; RV32-NEXT: addi a0, a0, -256
-; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV32-NEXT: vmv.s.x v0, a0
-; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV32-NEXT: vrgather.vv v9, v8, v12, v0.t
-; RV32-NEXT: vmsne.vi v9, v9, 0
-; RV32-NEXT: vadd.vi v12, v11, 1
-; RV32-NEXT: vrgather.vv v13, v10, v12
-; RV32-NEXT: vadd.vi v10, v11, -15
-; RV32-NEXT: vrgather.vv v13, v8, v10, v0.t
-; RV32-NEXT: vmsne.vi v8, v13, 0
-; RV32-NEXT: vmv.v.v v0, v9
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vector_deinterleave_v16i1_v32i1:
-; RV64: # %bb.0:
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vmv.v.i v8, 0
-; RV64-NEXT: vmerge.vim v10, v8, 1, v0
-; RV64-NEXT: vid.v v9
-; RV64-NEXT: vadd.vv v11, v9, v9
-; RV64-NEXT: vrgather.vv v9, v10, v11
-; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT: vslidedown.vi v0, v0, 2
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma
-; RV64-NEXT: vmerge.vim v8, v8, 1, v0
-; RV64-NEXT: vadd.vi v12, v11, -16
-; RV64-NEXT: lui a0, 16
-; RV64-NEXT: addiw a0, a0, -256
-; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
-; RV64-NEXT: vmv.s.x v0, a0
-; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, mu
-; RV64-NEXT: vrgather.vv v9, v8, v12, v0.t
-; RV64-NEXT: vmsne.vi v9, v9, 0
-; RV64-NEXT: vadd.vi v12, v11, 1
-; RV64-NEXT: vrgather.vv v13, v10, v12
-; RV64-NEXT: vadd.vi v10, v11, -15
-; RV64-NEXT: vrgather.vv v13, v8, v10, v0.t
-; RV64-NEXT: vmsne.vi v8, v13, 0
-; RV64-NEXT: vmv.v.v v0, v9
-; RV64-NEXT: ret
+; CHECK-LABEL: vector_deinterleave_v16i1_v32i1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: vmerge.vim v10, v8, 1, v0
+; CHECK-NEXT: vid.v v9
+; CHECK-NEXT: vadd.vv v11, v9, v9
+; CHECK-NEXT: vrgather.vv v9, v10, v11
+; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
+; CHECK-NEXT: vslidedown.vi v0, v0, 2
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma
+; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
+; CHECK-NEXT: vadd.vi v12, v11, -16
+; CHECK-NEXT: li a0, -256
+; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
+; CHECK-NEXT: vrgather.vv v9, v8, v12, v0.t
+; CHECK-NEXT: vmsne.vi v9, v9, 0
+; CHECK-NEXT: vadd.vi v12, v11, 1
+; CHECK-NEXT: vrgather.vv v13, v10, v12
+; CHECK-NEXT: vadd.vi v10, v11, -15
+; CHECK-NEXT: vrgather.vv v13, v8, v10, v0.t
+; CHECK-NEXT: vmsne.vi v8, v13, 0
+; CHECK-NEXT: vmv.v.v v0, v9
+; CHECK-NEXT: ret
%retval = call {<16 x i1>, <16 x i1>} @llvm.experimental.vector.deinterleave2.v32i1(<32 x i1> %vec)
ret {<16 x i1>, <16 x i1>} %retval
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
index 06f5d39622da834..0fda7909df3134f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulh-sdnode.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV32
-; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,RV64
+; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s
; Test that the prepareSREMEqFold optimization doesn't crash on scalable
; vector types.
@@ -60,21 +60,12 @@ define <vscale x 1 x i32> @vmulh_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %x) {
}
define <vscale x 1 x i32> @vmulh_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vmulh_vi_nxv1i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulh_vi_nxv1i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulh_vi_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 1 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 1 x i32> %head1, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
%vb = sext <vscale x 1 x i32> %splat1 to <vscale x 1 x i64>
@@ -141,21 +132,12 @@ define <vscale x 2 x i32> @vmulh_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %x) {
}
define <vscale x 2 x i32> @vmulh_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vmulh_vi_nxv2i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulh_vi_nxv2i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulh_vi_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 2 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 2 x i32> %head1, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%vb = sext <vscale x 2 x i32> %splat1 to <vscale x 2 x i64>
@@ -222,21 +204,12 @@ define <vscale x 4 x i32> @vmulh_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %x) {
}
define <vscale x 4 x i32> @vmulh_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vmulh_vi_nxv4i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulh_vi_nxv4i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulh_vi_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 4 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 4 x i32> %head1, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
%vb = sext <vscale x 4 x i32> %splat1 to <vscale x 4 x i64>
@@ -303,21 +276,12 @@ define <vscale x 8 x i32> @vmulh_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %x) {
}
define <vscale x 8 x i32> @vmulh_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vmulh_vi_nxv8i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vmulh.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulh_vi_nxv8i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vmulh.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulh_vi_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmulh.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 8 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
%vb = sext <vscale x 8 x i32> %splat1 to <vscale x 8 x i64>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
index 186d56b1293dbef..5354c17fd2a7d7c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vmulhu-sdnode.ll
@@ -37,21 +37,12 @@ define <vscale x 1 x i32> @vmulhu_vx_nxv1i32(<vscale x 1 x i32> %va, i32 %x) {
}
define <vscale x 1 x i32> @vmulhu_vi_nxv1i32_0(<vscale x 1 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv1i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv1i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv1i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 1 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 1 x i32> %head1, <vscale x 1 x i32> poison, <vscale x 1 x i32> zeroinitializer
%vb = zext <vscale x 1 x i32> %splat1 to <vscale x 1 x i64>
@@ -124,21 +115,12 @@ define <vscale x 2 x i32> @vmulhu_vx_nxv2i32(<vscale x 2 x i32> %va, i32 %x) {
}
define <vscale x 2 x i32> @vmulhu_vi_nxv2i32_0(<vscale x 2 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv2i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv2i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, m1, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv2i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 2 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 2 x i32> %head1, <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
%vb = zext <vscale x 2 x i32> %splat1 to <vscale x 2 x i64>
@@ -211,21 +193,12 @@ define <vscale x 4 x i32> @vmulhu_vx_nxv4i32(<vscale x 4 x i32> %va, i32 %x) {
}
define <vscale x 4 x i32> @vmulhu_vi_nxv4i32_0(<vscale x 4 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv4i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv4i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, m2, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 4 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 4 x i32> %head1, <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
%vb = zext <vscale x 4 x i32> %splat1 to <vscale x 4 x i64>
@@ -298,21 +271,12 @@ define <vscale x 8 x i32> @vmulhu_vx_nxv8i32(<vscale x 8 x i32> %va, i32 %x) {
}
define <vscale x 8 x i32> @vmulhu_vi_nxv8i32_0(<vscale x 8 x i32> %va) {
-; RV32-LABEL: vmulhu_vi_nxv8i32_0:
-; RV32: # %bb.0:
-; RV32-NEXT: li a0, -7
-; RV32-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT: vmulhu.vx v8, v8, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: vmulhu_vi_nxv8i32_0:
-; RV64: # %bb.0:
-; RV64-NEXT: li a0, 1
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: addi a0, a0, -7
-; RV64-NEXT: vsetvli a1, zero, e32, m4, ta, ma
-; RV64-NEXT: vmulhu.vx v8, v8, a0
-; RV64-NEXT: ret
+; CHECK-LABEL: vmulhu_vi_nxv8i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a0, -7
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vmulhu.vx v8, v8, a0
+; CHECK-NEXT: ret
%head1 = insertelement <vscale x 8 x i32> poison, i32 -7, i32 0
%splat1 = shufflevector <vscale x 8 x i32> %head1, <vscale x 8 x i32> poison, <vscale x 8 x i32> zeroinitializer
%vb = zext <vscale x 8 x i32> %splat1 to <vscale x 8 x i64>
diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
index ba7f3e801aa0730..6a8fe57f23f66a8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-int-vp.ll
@@ -852,8 +852,7 @@ define signext i32 @vpreduce_umax_nxv1i32(i32 signext %s, <vscale x 1 x i32> %v,
;
; RV64-LABEL: vpreduce_umax_nxv1i32:
; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
@@ -893,8 +892,7 @@ define signext i32 @vpreduce_umin_nxv1i32(i32 signext %s, <vscale x 1 x i32> %v,
;
; RV64-LABEL: vpreduce_umin_nxv1i32:
; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
@@ -994,8 +992,7 @@ define signext i32 @vpreduce_umax_nxv2i32(i32 signext %s, <vscale x 2 x i32> %v,
;
; RV64-LABEL: vpreduce_umax_nxv2i32:
; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
@@ -1035,8 +1032,7 @@ define signext i32 @vpreduce_umin_nxv2i32(i32 signext %s, <vscale x 2 x i32> %v,
;
; RV64-LABEL: vpreduce_umin_nxv2i32:
; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v9, a0
; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma
@@ -1136,8 +1132,7 @@ define signext i32 @vpreduce_umax_nxv4i32(i32 signext %s, <vscale x 4 x i32> %v,
;
; RV64-LABEL: vpreduce_umax_nxv4i32:
; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, a0
; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
@@ -1182,8 +1177,7 @@ define signext i32 @vpreduce_umax_nxv32i32(i32 signext %s, <vscale x 32 x i32> %
; RV64-NEXT: srli a2, a3, 2
; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma
; RV64-NEXT: vslidedown.vx v24, v0, a2
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a2, a0, 32
+; RV64-NEXT: andi a2, a0, -1
; RV64-NEXT: slli a3, a3, 1
; RV64-NEXT: sub a0, a1, a3
; RV64-NEXT: sltu a4, a1, a0
@@ -1235,8 +1229,7 @@ define signext i32 @vpreduce_umin_nxv4i32(i32 signext %s, <vscale x 4 x i32> %v,
;
; RV64-LABEL: vpreduce_umin_nxv4i32:
; RV64: # %bb.0:
-; RV64-NEXT: slli a0, a0, 32
-; RV64-NEXT: srli a0, a0, 32
+; RV64-NEXT: andi a0, a0, -1
; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; RV64-NEXT: vmv.s.x v10, a0
; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma
More information about the llvm-commits
mailing list