[llvm] 8b4c80d - Further improve register allocation for vwadd(u).wv, vwsub(u).wv, vfwadd.wv, and vfwsub.wv.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 8 09:43:53 PDT 2021
Author: Craig Topper
Date: 2021-06-08T09:43:43-07:00
New Revision: 8b4c80d380a681e6ea6ea60e9d9f9424e7782980
URL: https://github.com/llvm/llvm-project/commit/8b4c80d380a681e6ea6ea60e9d9f9424e7782980
DIFF: https://github.com/llvm/llvm-project/commit/8b4c80d380a681e6ea6ea60e9d9f9424e7782980.diff
LOG: Further improve register allocation for vwadd(u).wv, vwsub(u).wv, vfwadd.wv, and vfwsub.wv.
The first source has the same EEW as the destination, but we're
using earlyclobber which prevents them from ever being the same
register. This patch attempts to work around this.
-For unmasked .wv, add a special TIED pseudo that pretends like
the first operand and the destination must be the same register. This
disables the earlyclobber for that source. Mark the instruction
as convertible to 3 address form which will switch it to the
original untied pseudo when the TwoAddressInstructionPass decides
that keeping them tied would require an extra copy. This uses
code in RISCVInstrInfo.cpp to do the conversion to the untied
opcode.
The untie test case show that we can generate the untied version.
Not sure it was profitable to do it in this case, but they have
really simple IR.
Reviewed By: arcbbb
Differential Revision: https://reviews.llvm.org/D103552
Added:
Modified:
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
llvm/lib/Target/RISCV/RISCVInstrInfo.h
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll
llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll
llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll
llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll
llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll
llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll
llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll
llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll
llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll
llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 93e65cdada46..a14cd4a4ccb0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -19,6 +19,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -1353,6 +1354,86 @@ MachineInstr *RISCVInstrInfo::commuteInstructionImpl(MachineInstr &MI,
#undef CASE_VFMA_OPCODE_LMULS
#undef CASE_VFMA_OPCODE_COMMON
+// clang-format off
+#define CASE_WIDEOP_OPCODE_COMMON(OP, LMUL) \
+ RISCV::PseudoV##OP##_##LMUL##_TIED
+
+#define CASE_WIDEOP_OPCODE_LMULS(OP) \
+ CASE_WIDEOP_OPCODE_COMMON(OP, MF8): \
+ case CASE_WIDEOP_OPCODE_COMMON(OP, MF4): \
+ case CASE_WIDEOP_OPCODE_COMMON(OP, MF2): \
+ case CASE_WIDEOP_OPCODE_COMMON(OP, M1): \
+ case CASE_WIDEOP_OPCODE_COMMON(OP, M2): \
+ case CASE_WIDEOP_OPCODE_COMMON(OP, M4)
+// clang-format on
+
+#define CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, LMUL) \
+ case RISCV::PseudoV##OP##_##LMUL##_TIED: \
+ NewOpc = RISCV::PseudoV##OP##_##LMUL; \
+ break;
+
+#define CASE_WIDEOP_CHANGE_OPCODE_LMULS(OP) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF8) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF4) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, MF2) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M1) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M2) \
+ CASE_WIDEOP_CHANGE_OPCODE_COMMON(OP, M4)
+
+MachineInstr *RISCVInstrInfo::convertToThreeAddress(
+ MachineFunction::iterator &MBB, MachineInstr &MI, LiveVariables *LV) const {
+ switch (MI.getOpcode()) {
+ default:
+ break;
+ case CASE_WIDEOP_OPCODE_LMULS(FWADD_WV):
+ case CASE_WIDEOP_OPCODE_LMULS(FWSUB_WV):
+ case CASE_WIDEOP_OPCODE_LMULS(WADD_WV):
+ case CASE_WIDEOP_OPCODE_LMULS(WADDU_WV):
+ case CASE_WIDEOP_OPCODE_LMULS(WSUB_WV):
+ case CASE_WIDEOP_OPCODE_LMULS(WSUBU_WV): {
+ // clang-format off
+ unsigned NewOpc;
+ switch (MI.getOpcode()) {
+ default:
+ llvm_unreachable("Unexpected opcode");
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWADD_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS(FWSUB_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADD_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS(WADDU_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUB_WV)
+ CASE_WIDEOP_CHANGE_OPCODE_LMULS(WSUBU_WV)
+ }
+ //clang-format on
+
+ MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI.getDebugLoc(), get(NewOpc))
+ .add(MI.getOperand(0))
+ .add(MI.getOperand(1))
+ .add(MI.getOperand(2))
+ .add(MI.getOperand(3))
+ .add(MI.getOperand(4));
+ MIB.copyImplicitOps(MI);
+
+ if (LV) {
+ unsigned NumOps = MI.getNumOperands();
+ for (unsigned I = 1; I < NumOps; ++I) {
+ MachineOperand &Op = MI.getOperand(I);
+ if (Op.isReg() && Op.isKill())
+ LV->replaceKillInstruction(Op.getReg(), MI, *MIB);
+ }
+ }
+
+ return MIB;
+ }
+ }
+
+ return nullptr;
+}
+
+#undef CASE_WIDEOP_CHANGE_OPCODE_LMULS
+#undef CASE_WIDEOP_CHANGE_OPCODE_COMMON
+#undef CASE_WIDEOP_OPCODE_LMULS
+#undef CASE_WIDEOP_OPCODE_COMMON
+
Register RISCVInstrInfo::getVLENFactoredAmount(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator II,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 74dc2786ba97..905fe692975b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -143,6 +143,10 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
unsigned OpIdx1,
unsigned OpIdx2) const override;
+ MachineInstr *convertToThreeAddress(MachineFunction::iterator &MBB,
+ MachineInstr &MI,
+ LiveVariables *LV) const override;
+
Register getVLENFactoredAmount(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator II,
const DebugLoc &DL, int64_t Amount) const;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 9a1c32262a4b..deb3cc96b37b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -909,6 +909,24 @@ class VPseudoBinaryNoMask<VReg RetClass,
let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
}
+class VPseudoTiedBinaryNoMask<VReg RetClass,
+ DAGOperand Op2Class,
+ string Constraint> :
+ Pseudo<(outs RetClass:$rd),
+ (ins RetClass:$rs2, Op2Class:$rs1, AVL:$vl, ixlenimm:$sew), []>,
+ RISCVVPseudo {
+ let mayLoad = 0;
+ let mayStore = 0;
+ let hasSideEffects = 0;
+ let Constraints = Join<[Constraint, "$rd = $rs2"], ",">.ret;
+ let HasVLOp = 1;
+ let HasSEWOp = 1;
+ let HasDummyMask = 1;
+ let ForceTailAgnostic = 1;
+ let isConvertibleToThreeAddress = 1;
+ let BaseInstr = !cast<Instruction>(PseudoToVInst<NAME>.VInst);
+}
+
class VPseudoIStoreNoMask<VReg StClass, VReg IdxClass, int EEW, bits<3> LMUL,
bit Ordered>:
Pseudo<(outs),
@@ -1511,6 +1529,8 @@ multiclass VPseudoTiedBinary<VReg RetClass,
LMULInfo MInfo,
string Constraint = ""> {
let VLMul = MInfo.value in {
+ def "_" # MInfo.MX # "_TIED": VPseudoTiedBinaryNoMask<RetClass, Op2Class,
+ Constraint>;
def "_" # MInfo.MX # "_MASK_TIED" : VPseudoTiedBinaryMask<RetClass, Op2Class,
Constraint>;
}
@@ -2296,6 +2316,22 @@ class VPatBinaryMaskSwapped<string intrinsic_name,
(op2_type op2_kind:$rs2),
(mask_type V0), GPR:$vl, sew)>;
+class VPatTiedBinaryNoMask<string intrinsic_name,
+ string inst,
+ ValueType result_type,
+ ValueType op2_type,
+ int sew,
+ VReg result_reg_class,
+ DAGOperand op2_kind> :
+ Pat<(result_type (!cast<Intrinsic>(intrinsic_name)
+ (result_type result_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ VLOpFrag)),
+ (!cast<Instruction>(inst#"_TIED")
+ (result_type result_reg_class:$rs1),
+ (op2_type op2_kind:$rs2),
+ GPR:$vl, sew)>;
+
class VPatTiedBinaryMask<string intrinsic_name,
string inst,
ValueType result_type,
@@ -2697,14 +2733,17 @@ multiclass VPatBinaryW_WV<string intrinsic, string instruction,
foreach VtiToWti = vtilist in {
defvar Vti = VtiToWti.Vti;
defvar Wti = VtiToWti.Wti;
+ def : VPatTiedBinaryNoMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Vti.Vector,
+ Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
let AddedComplexity = 1 in
def : VPatTiedBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
Wti.Vector, Vti.Vector, Vti.Mask,
Vti.Log2SEW, Wti.RegClass, Vti.RegClass>;
- defm : VPatBinary<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
- Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
- Vti.Log2SEW, Wti.RegClass,
- Wti.RegClass, Vti.RegClass>;
+ def : VPatBinaryMask<intrinsic, instruction # "_WV_" # Vti.LMul.MX,
+ Wti.Vector, Wti.Vector, Vti.Vector, Vti.Mask,
+ Vti.Log2SEW, Wti.RegClass,
+ Wti.RegClass, Vti.RegClass>;
}
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll
index bcbd3064454a..7bd331c02166 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv32.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16(<vsca
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vfwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
@@ -55,8 +54,7 @@ define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16(<vsca
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vfwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
@@ -100,8 +98,7 @@ define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16(<vsca
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vfwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
@@ -145,8 +142,7 @@ define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16(<vsca
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vfwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
@@ -190,8 +186,7 @@ define <vscale x 16 x float> @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16(<
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vfwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16(
@@ -236,8 +231,7 @@ define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32(<vsc
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vfwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
@@ -281,8 +275,7 @@ define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32(<vsc
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vfwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
@@ -326,8 +319,7 @@ define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32(<vsc
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vfwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
@@ -371,8 +363,7 @@ define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32(<vsc
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vfwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
@@ -1136,3 +1127,131 @@ entry:
ret <vscale x 8 x double> %a
}
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vfwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
+ <vscale x 1 x float> %1,
+ <vscale x 1 x half> %0,
+ i32 %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vfwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
+ <vscale x 2 x float> %1,
+ <vscale x 2 x half> %0,
+ i32 %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vfwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
+ <vscale x 4 x float> %1,
+ <vscale x 4 x half> %0,
+ i32 %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vfwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
+ <vscale x 8 x float> %1,
+ <vscale x 8 x half> %0,
+ i32 %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vfwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
+ <vscale x 1 x double> %1,
+ <vscale x 1 x float> %0,
+ i32 %2)
+
+ ret <vscale x 1 x double> %a
+}
+
+define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vfwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
+ <vscale x 2 x double> %1,
+ <vscale x 2 x float> %0,
+ i32 %2)
+
+ ret <vscale x 2 x double> %a
+}
+
+define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vfwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
+ <vscale x 4 x double> %1,
+ <vscale x 4 x float> %0,
+ i32 %2)
+
+ ret <vscale x 4 x double> %a
+}
+
+define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vfwadd.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
+ <vscale x 8 x double> %1,
+ <vscale x 8 x float> %0,
+ i32 %2)
+
+ ret <vscale x 8 x double> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
index a4a499597f44..e73980c8155f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwadd.w-rv64.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16(<vsca
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f32_nxv1f32_nxv1f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vfwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
@@ -55,8 +54,7 @@ define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16(<vsca
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f32_nxv2f32_nxv2f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vfwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
@@ -100,8 +98,7 @@ define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16(<vsca
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f32_nxv4f32_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vfwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
@@ -145,8 +142,7 @@ define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16(<vsca
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f32_nxv8f32_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vfwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
@@ -190,8 +186,7 @@ define <vscale x 16 x float> @intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16(<
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv16f32_nxv16f32_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vfwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwadd.w.nxv16f32.nxv16f16(
@@ -236,8 +231,7 @@ define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32(<vsc
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv1f64_nxv1f64_nxv1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vfwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
@@ -281,8 +275,7 @@ define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32(<vsc
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv2f64_nxv2f64_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vfwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
@@ -326,8 +319,7 @@ define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32(<vsc
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv4f64_nxv4f64_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vfwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
@@ -371,8 +363,7 @@ define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32(<vsc
; CHECK-LABEL: intrinsic_vfwadd.w_wv_nxv8f64_nxv8f64_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vfwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
@@ -1136,3 +1127,131 @@ entry:
ret <vscale x 8 x double> %a
}
+
+define <vscale x 1 x float> @intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f32_nxv1f32_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vfwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwadd.w.nxv1f32.nxv1f16(
+ <vscale x 1 x float> %1,
+ <vscale x 1 x half> %0,
+ i64 %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f32_nxv2f32_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vfwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwadd.w.nxv2f32.nxv2f16(
+ <vscale x 2 x float> %1,
+ <vscale x 2 x half> %0,
+ i64 %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vfwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwadd.w.nxv4f32.nxv4f16(
+ <vscale x 4 x float> %1,
+ <vscale x 4 x half> %0,
+ i64 %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vfwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwadd.w.nxv8f32.nxv8f16(
+ <vscale x 8 x float> %1,
+ <vscale x 8 x half> %0,
+ i64 %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 1 x double> @intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv1f64_nxv1f64_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vfwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x double> @llvm.riscv.vfwadd.w.nxv1f64.nxv1f32(
+ <vscale x 1 x double> %1,
+ <vscale x 1 x float> %0,
+ i64 %2)
+
+ ret <vscale x 1 x double> %a
+}
+
+define <vscale x 2 x double> @intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vfwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x double> @llvm.riscv.vfwadd.w.nxv2f64.nxv2f32(
+ <vscale x 2 x double> %1,
+ <vscale x 2 x float> %0,
+ i64 %2)
+
+ ret <vscale x 2 x double> %a
+}
+
+define <vscale x 4 x double> @intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vfwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x double> @llvm.riscv.vfwadd.w.nxv4f64.nxv4f32(
+ <vscale x 4 x double> %1,
+ <vscale x 4 x float> %0,
+ i64 %2)
+
+ ret <vscale x 4 x double> %a
+}
+
+define <vscale x 8 x double> @intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwadd.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vfwadd.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x double> @llvm.riscv.vfwadd.w.nxv8f64.nxv8f32(
+ <vscale x 8 x double> %1,
+ <vscale x 8 x float> %0,
+ i64 %2)
+
+ ret <vscale x 8 x double> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
index 69b64abb7d0b..b291f7331db2 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv32.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16(<vsca
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vfwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
@@ -55,8 +54,7 @@ define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16(<vsca
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vfwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
@@ -100,8 +98,7 @@ define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16(<vsca
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vfwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
@@ -145,8 +142,7 @@ define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16(<vsca
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vfwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
@@ -190,8 +186,7 @@ define <vscale x 16 x float> @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16(<
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vfwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16(
@@ -236,8 +231,7 @@ define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32(<vsc
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vfwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
@@ -281,8 +275,7 @@ define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32(<vsc
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vfwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
@@ -326,8 +319,7 @@ define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32(<vsc
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vfwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
@@ -371,8 +363,7 @@ define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32(<vsc
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vfwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
@@ -1136,3 +1127,131 @@ entry:
ret <vscale x 8 x double> %a
}
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vfwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
+ <vscale x 1 x float> %1,
+ <vscale x 1 x half> %0,
+ i32 %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vfwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
+ <vscale x 2 x float> %1,
+ <vscale x 2 x half> %0,
+ i32 %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vfwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
+ <vscale x 4 x float> %1,
+ <vscale x 4 x half> %0,
+ i32 %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vfwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
+ <vscale x 8 x float> %1,
+ <vscale x 8 x half> %0,
+ i32 %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vfwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
+ <vscale x 1 x double> %1,
+ <vscale x 1 x float> %0,
+ i32 %2)
+
+ ret <vscale x 1 x double> %a
+}
+
+define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vfwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
+ <vscale x 2 x double> %1,
+ <vscale x 2 x float> %0,
+ i32 %2)
+
+ ret <vscale x 2 x double> %a
+}
+
+define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vfwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
+ <vscale x 4 x double> %1,
+ <vscale x 4 x float> %0,
+ i32 %2)
+
+ ret <vscale x 4 x double> %a
+}
+
+define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vfwsub.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
+ <vscale x 8 x double> %1,
+ <vscale x 8 x float> %0,
+ i32 %2)
+
+ ret <vscale x 8 x double> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll
index f3782cad04dc..eb4c0ef7adac 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vfwsub.w-rv64.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16(<vsca
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f32_nxv1f32_nxv1f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vfwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
@@ -55,8 +54,7 @@ define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16(<vsca
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f32_nxv2f32_nxv2f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vfwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
@@ -100,8 +98,7 @@ define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16(<vsca
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f32_nxv4f32_nxv4f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vfwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
@@ -145,8 +142,7 @@ define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16(<vsca
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f32_nxv8f32_nxv8f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vfwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
@@ -190,8 +186,7 @@ define <vscale x 16 x float> @intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16(<
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv16f32_nxv16f32_nxv16f16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vfwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x float> @llvm.riscv.vfwsub.w.nxv16f32.nxv16f16(
@@ -236,8 +231,7 @@ define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32(<vsc
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv1f64_nxv1f64_nxv1f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vfwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vfwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
@@ -281,8 +275,7 @@ define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32(<vsc
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv2f64_nxv2f64_nxv2f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vfwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vfwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
@@ -326,8 +319,7 @@ define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32(<vsc
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv4f64_nxv4f64_nxv4f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vfwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vfwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
@@ -371,8 +363,7 @@ define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32(<vsc
; CHECK-LABEL: intrinsic_vfwsub.w_wv_nxv8f64_nxv8f64_nxv8f32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vfwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vfwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
@@ -1136,3 +1127,131 @@ entry:
ret <vscale x 8 x double> %a
}
+
+define <vscale x 1 x float> @intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16(<vscale x 1 x half> %0, <vscale x 1 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f32_nxv1f32_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vfwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfwsub.w.nxv1f32.nxv1f16(
+ <vscale x 1 x float> %1,
+ <vscale x 1 x half> %0,
+ i64 %2)
+
+ ret <vscale x 1 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16(<vscale x 2 x half> %0, <vscale x 2 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f32_nxv2f32_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vfwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfwsub.w.nxv2f32.nxv2f16(
+ <vscale x 2 x float> %1,
+ <vscale x 2 x half> %0,
+ i64 %2)
+
+ ret <vscale x 2 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16(<vscale x 4 x half> %0, <vscale x 4 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f32_nxv4f32_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vfwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfwsub.w.nxv4f32.nxv4f16(
+ <vscale x 4 x float> %1,
+ <vscale x 4 x half> %0,
+ i64 %2)
+
+ ret <vscale x 4 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16(<vscale x 8 x half> %0, <vscale x 8 x float> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f32_nxv8f32_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vfwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfwsub.w.nxv8f32.nxv8f16(
+ <vscale x 8 x float> %1,
+ <vscale x 8 x half> %0,
+ i64 %2)
+
+ ret <vscale x 8 x float> %a
+}
+
+define <vscale x 1 x double> @intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32(<vscale x 1 x float> %0, <vscale x 1 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv1f64_nxv1f64_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vfwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x double> @llvm.riscv.vfwsub.w.nxv1f64.nxv1f32(
+ <vscale x 1 x double> %1,
+ <vscale x 1 x float> %0,
+ i64 %2)
+
+ ret <vscale x 1 x double> %a
+}
+
+define <vscale x 2 x double> @intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32(<vscale x 2 x float> %0, <vscale x 2 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv2f64_nxv2f64_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vfwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x double> @llvm.riscv.vfwsub.w.nxv2f64.nxv2f32(
+ <vscale x 2 x double> %1,
+ <vscale x 2 x float> %0,
+ i64 %2)
+
+ ret <vscale x 2 x double> %a
+}
+
+define <vscale x 4 x double> @intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32(<vscale x 4 x float> %0, <vscale x 4 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv4f64_nxv4f64_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vfwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x double> @llvm.riscv.vfwsub.w.nxv4f64.nxv4f32(
+ <vscale x 4 x double> %1,
+ <vscale x 4 x float> %0,
+ i64 %2)
+
+ ret <vscale x 4 x double> %a
+}
+
+define <vscale x 8 x double> @intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32(<vscale x 8 x float> %0, <vscale x 8 x double> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vfwsub.w_wv_untie_nxv8f64_nxv8f64_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vfwsub.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x double> @llvm.riscv.vfwsub.w.nxv8f64.nxv8f32(
+ <vscale x 8 x double> %1,
+ <vscale x 8 x float> %0,
+ i64 %2)
+
+ ret <vscale x 8 x double> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll
index c56f4f4592a7..b0f48cb1bede 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv32.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale x
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwadd.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale x
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwadd.w.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwadd.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale x
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwadd.w.nxv4i16.nxv4i8(
@@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwadd.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale x
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8(
@@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwadd.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsca
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8(
@@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwadd.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsca
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8(
@@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwadd.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwadd.w.nxv1i32.nxv1i16(
@@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwadd.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(
@@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwadd.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16(
@@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwadd.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16(
@@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwadd.w_wv_nxv16i32_nxv16i32_nxv16i16(<vsc
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwadd.w.nxv16i32.nxv16i16(
@@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwadd.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwadd.w.nxv1i64.nxv1i32(
@@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwadd.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32(
@@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwadd.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32(
@@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwadd.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32(
@@ -1848,3 +1833,227 @@ entry:
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwadd.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwadd.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwadd.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwadd.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwadd.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwadd.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll
index fc03c38e19df..ec0bb0e7086d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwadd.w-rv64.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale x
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwadd.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale x
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwadd.w.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwadd.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale x
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwadd.w.nxv4i16.nxv4i8(
@@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwadd.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale x
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8(
@@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwadd.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsca
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8(
@@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwadd.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsca
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8(
@@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwadd.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwadd.w.nxv1i32.nxv1i16(
@@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwadd.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(
@@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwadd.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16(
@@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwadd.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16(
@@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwadd.w_wv_nxv16i32_nxv16i32_nxv16i16(<vsc
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwadd.w.nxv16i32.nxv16i16(
@@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwadd.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwadd.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwadd.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwadd.w.nxv1i64.nxv1i32(
@@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwadd.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwadd.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwadd.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32(
@@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwadd.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwadd.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwadd.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32(
@@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwadd.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale
; CHECK-LABEL: intrinsic_vwadd.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwadd.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwadd.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32(
@@ -1848,3 +1833,227 @@ entry:
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwadd.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwadd.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwadd.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwadd.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwadd.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwadd.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwadd.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwadd.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwadd.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwadd.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwadd.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwadd.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwadd.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwadd.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwadd.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwadd.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwadd.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwadd.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwadd.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwadd.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll
index dcbe23843d03..f4054063b5f3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv32.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwaddu.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwaddu.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwaddu.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8(
@@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwaddu.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwaddu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8(
@@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwaddu.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsc
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwaddu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8(
@@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwaddu.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsc
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8(
@@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwaddu.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16(
@@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwaddu.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16(
@@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwaddu.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwaddu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16(
@@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwaddu.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwaddu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16(
@@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwaddu.w_wv_nxv16i32_nxv16i32_nxv16i16(<vs
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwaddu.w.nxv16i32.nxv16i16(
@@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwaddu.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32(
@@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwaddu.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwaddu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32(
@@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwaddu.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwaddu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32(
@@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwaddu.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32(
@@ -1848,3 +1833,227 @@ entry:
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwaddu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwaddu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwaddu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwaddu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwaddu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwaddu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwaddu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwaddu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll
index e5bb4a3c1b96..2df24fa20d0a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwaddu.w-rv64.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwaddu.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwaddu.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwaddu.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8(
@@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwaddu.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwaddu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8(
@@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwaddu.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsc
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwaddu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8(
@@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwaddu.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsc
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8(
@@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwaddu.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16(
@@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwaddu.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16(
@@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwaddu.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwaddu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16(
@@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwaddu.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwaddu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16(
@@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwaddu.w_wv_nxv16i32_nxv16i32_nxv16i16(<vs
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwaddu.w.nxv16i32.nxv16i16(
@@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwaddu.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwaddu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwaddu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32(
@@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwaddu.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwaddu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwaddu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32(
@@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwaddu.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwaddu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwaddu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32(
@@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwaddu.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale
; CHECK-LABEL: intrinsic_vwaddu.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwaddu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwaddu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32(
@@ -1848,3 +1833,227 @@ entry:
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwaddu.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwaddu.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwaddu.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwaddu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwaddu.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwaddu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwaddu.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwaddu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwaddu.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwaddu.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwaddu.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwaddu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwaddu.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwaddu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwaddu.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwaddu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwaddu.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwaddu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwaddu.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwaddu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwaddu.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwaddu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwaddu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwaddu.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll
index 8f58e8c0b5d4..f5cef80df657 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv32.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale x
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwsub.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale x
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwsub.w.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwsub.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale x
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwsub.w.nxv4i16.nxv4i8(
@@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwsub.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale x
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8(
@@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwsub.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsca
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8(
@@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwsub.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsca
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8(
@@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwsub.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwsub.w.nxv1i32.nxv1i16(
@@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwsub.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwsub.w.nxv2i32.nxv2i16(
@@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwsub.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16(
@@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwsub.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16(
@@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwsub.w_wv_nxv16i32_nxv16i32_nxv16i16(<vsc
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsub.w.nxv16i32.nxv16i16(
@@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwsub.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwsub.w.nxv1i64.nxv1i32(
@@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwsub.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(
@@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwsub.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32(
@@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwsub.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32(
@@ -1848,3 +1833,227 @@ entry:
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwsub.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwsub.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwsub.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwsub.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwsub.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwsub.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwsub.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll
index 76dcdeba0358..535991669649 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsub.w-rv64.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale x
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwsub.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale x
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwsub.w.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwsub.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale x
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwsub.w.nxv4i16.nxv4i8(
@@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwsub.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale x
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8(
@@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwsub.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsca
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8(
@@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwsub.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsca
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8(
@@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwsub.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwsub.w.nxv1i32.nxv1i16(
@@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwsub.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwsub.w.nxv2i32.nxv2i16(
@@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwsub.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16(
@@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwsub.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16(
@@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwsub.w_wv_nxv16i32_nxv16i32_nxv16i16(<vsc
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsub.w.nxv16i32.nxv16i16(
@@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwsub.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwsub.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsub.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwsub.w.nxv1i64.nxv1i32(
@@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwsub.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwsub.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsub.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(
@@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwsub.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwsub.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsub.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32(
@@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwsub.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale
; CHECK-LABEL: intrinsic_vwsub.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwsub.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsub.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32(
@@ -1848,3 +1833,227 @@ entry:
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwsub.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwsub.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwsub.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwsub.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwsub.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwsub.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwsub.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwsub.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwsub.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwsub.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwsub.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwsub.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwsub.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwsub.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwsub.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwsub.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwsub.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsub.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwsub.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwsub.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll
index 79b858d51a26..1b3e96e3196c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv32.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwsubu.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwsubu.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8(
@@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwsubu.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwsubu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8(
@@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwsubu.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsc
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwsubu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8(
@@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwsubu.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsc
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8(
@@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwsubu.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16(
@@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwsubu.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16(
@@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwsubu.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwsubu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16(
@@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwsubu.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwsubu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16(
@@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwsubu.w_wv_nxv16i32_nxv16i32_nxv16i16(<vs
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsubu.w.nxv16i32.nxv16i16(
@@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwsubu.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32(
@@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwsubu.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwsubu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32(
@@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwsubu.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwsubu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32(
@@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwsubu.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32(
@@ -1848,3 +1833,227 @@ entry:
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwsubu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwsubu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwsubu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i32 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwsubu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwsubu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwsubu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwsubu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i32 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwsubu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i32 %2)
+
+ ret <vscale x 8 x i64> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll
index 2152b4d44d82..820542959463 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vwsubu.w-rv64.ll
@@ -10,8 +10,7 @@ define <vscale x 1 x i16> @intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i16_nxv1i16_nxv1i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8(
@@ -55,8 +54,7 @@ define <vscale x 2 x i16> @intrinsic_vwsubu.w_wv_nxv2i16_nxv2i16_nxv2i8(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i16_nxv2i16_nxv2i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i16> @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8(
@@ -100,8 +98,7 @@ define <vscale x 4 x i16> @intrinsic_vwsubu.w_wv_nxv4i16_nxv4i16_nxv4i8(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i16_nxv4i16_nxv4i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i16> @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8(
@@ -145,8 +142,7 @@ define <vscale x 8 x i16> @intrinsic_vwsubu.w_wv_nxv8i16_nxv8i16_nxv8i8(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i16_nxv8i16_nxv8i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
-; CHECK-NEXT: vwsubu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8(
@@ -190,8 +186,7 @@ define <vscale x 16 x i16> @intrinsic_vwsubu.w_wv_nxv16i16_nxv16i16_nxv16i8(<vsc
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i16_nxv16i16_nxv16i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
-; CHECK-NEXT: vwsubu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8(
@@ -235,8 +230,7 @@ define <vscale x 32 x i16> @intrinsic_vwsubu.w_wv_nxv32i16_nxv32i16_nxv32i8(<vsc
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv32i16_nxv32i16_nxv32i8:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8(
@@ -281,8 +275,7 @@ define <vscale x 1 x i32> @intrinsic_vwsubu.w_wv_nxv1i32_nxv1i32_nxv1i16(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i32_nxv1i32_nxv1i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i32> @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16(
@@ -326,8 +319,7 @@ define <vscale x 2 x i32> @intrinsic_vwsubu.w_wv_nxv2i32_nxv2i32_nxv2i16(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i32_nxv2i32_nxv2i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i32> @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16(
@@ -371,8 +363,7 @@ define <vscale x 4 x i32> @intrinsic_vwsubu.w_wv_nxv4i32_nxv4i32_nxv4i16(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i32_nxv4i32_nxv4i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
-; CHECK-NEXT: vwsubu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16(
@@ -416,8 +407,7 @@ define <vscale x 8 x i32> @intrinsic_vwsubu.w_wv_nxv8i32_nxv8i32_nxv8i16(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i32_nxv8i32_nxv8i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
-; CHECK-NEXT: vwsubu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16(
@@ -461,8 +451,7 @@ define <vscale x 16 x i32> @intrinsic_vwsubu.w_wv_nxv16i32_nxv16i32_nxv16i16(<vs
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv16i32_nxv16i32_nxv16i16:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e16,m4,ta,mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 16 x i32> @llvm.riscv.vwsubu.w.nxv16i32.nxv16i16(
@@ -507,8 +496,7 @@ define <vscale x 1 x i64> @intrinsic_vwsubu.w_wv_nxv1i64_nxv1i64_nxv1i32(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv1i64_nxv1i64_nxv1i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
-; CHECK-NEXT: vwsubu.wv v25, v8, v9
-; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: vwsubu.wv v8, v8, v9
; CHECK-NEXT: ret
entry:
%a = call <vscale x 1 x i64> @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32(
@@ -552,8 +540,7 @@ define <vscale x 2 x i64> @intrinsic_vwsubu.w_wv_nxv2i64_nxv2i64_nxv2i32(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv2i64_nxv2i64_nxv2i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
-; CHECK-NEXT: vwsubu.wv v26, v8, v10
-; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: vwsubu.wv v8, v8, v10
; CHECK-NEXT: ret
entry:
%a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32(
@@ -597,8 +584,7 @@ define <vscale x 4 x i64> @intrinsic_vwsubu.w_wv_nxv4i64_nxv4i64_nxv4i32(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv4i64_nxv4i64_nxv4i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
-; CHECK-NEXT: vwsubu.wv v28, v8, v12
-; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: vwsubu.wv v8, v8, v12
; CHECK-NEXT: ret
entry:
%a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32(
@@ -642,8 +628,7 @@ define <vscale x 8 x i64> @intrinsic_vwsubu.w_wv_nxv8i64_nxv8i64_nxv8i32(<vscale
; CHECK-LABEL: intrinsic_vwsubu.w_wv_nxv8i64_nxv8i64_nxv8i32:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
-; CHECK-NEXT: vwsubu.wv v24, v8, v16
-; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: vwsubu.wv v8, v8, v16
; CHECK-NEXT: ret
entry:
%a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32(
@@ -1848,3 +1833,227 @@ entry:
ret <vscale x 8 x i64> %a
}
+
+define <vscale x 1 x i16> @intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8(<vscale x 1 x i8> %0, <vscale x 1 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i16_nxv1i16_nxv1i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf8,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i16> @llvm.riscv.vwsubu.w.nxv1i16.nxv1i8(
+ <vscale x 1 x i16> %1,
+ <vscale x 1 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i16> %a
+}
+
+define <vscale x 2 x i16> @intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8(<vscale x 2 x i8> %0, <vscale x 2 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i16_nxv2i16_nxv2i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf4,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i16> @llvm.riscv.vwsubu.w.nxv2i16.nxv2i8(
+ <vscale x 2 x i16> %1,
+ <vscale x 2 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i16> %a
+}
+
+define <vscale x 4 x i16> @intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8(<vscale x 4 x i8> %0, <vscale x 4 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i16_nxv4i16_nxv4i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,mf2,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i16> @llvm.riscv.vwsubu.w.nxv4i16.nxv4i8(
+ <vscale x 4 x i16> %1,
+ <vscale x 4 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i16> %a
+}
+
+define <vscale x 8 x i16> @intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8(<vscale x 8 x i8> %0, <vscale x 8 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i16_nxv8i16_nxv8i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m1,ta,mu
+; CHECK-NEXT: vwsubu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i16> @llvm.riscv.vwsubu.w.nxv8i16.nxv8i8(
+ <vscale x 8 x i16> %1,
+ <vscale x 8 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i16> %a
+}
+
+define <vscale x 16 x i16> @intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8(<vscale x 16 x i8> %0, <vscale x 16 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv16i16_nxv16i16_nxv16i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m2,ta,mu
+; CHECK-NEXT: vwsubu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 16 x i16> @llvm.riscv.vwsubu.w.nxv16i16.nxv16i8(
+ <vscale x 16 x i16> %1,
+ <vscale x 16 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 16 x i16> %a
+}
+
+define <vscale x 32 x i16> @intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8(<vscale x 32 x i8> %0, <vscale x 32 x i16> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv32i16_nxv32i16_nxv32i8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e8,m4,ta,mu
+; CHECK-NEXT: vwsubu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 32 x i16> @llvm.riscv.vwsubu.w.nxv32i16.nxv32i8(
+ <vscale x 32 x i16> %1,
+ <vscale x 32 x i8> %0,
+ i64 %2)
+
+ ret <vscale x 32 x i16> %a
+}
+
+define <vscale x 1 x i32> @intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16(<vscale x 1 x i16> %0, <vscale x 1 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i32_nxv1i32_nxv1i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i32> @llvm.riscv.vwsubu.w.nxv1i32.nxv1i16(
+ <vscale x 1 x i32> %1,
+ <vscale x 1 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i32> %a
+}
+
+define <vscale x 2 x i32> @intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16(<vscale x 2 x i16> %0, <vscale x 2 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i32_nxv2i32_nxv2i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i32> @llvm.riscv.vwsubu.w.nxv2i32.nxv2i16(
+ <vscale x 2 x i32> %1,
+ <vscale x 2 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i32> %a
+}
+
+define <vscale x 4 x i32> @intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16(<vscale x 4 x i16> %0, <vscale x 4 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i32_nxv4i32_nxv4i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m1,ta,mu
+; CHECK-NEXT: vwsubu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i32> @llvm.riscv.vwsubu.w.nxv4i32.nxv4i16(
+ <vscale x 4 x i32> %1,
+ <vscale x 4 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i32> %a
+}
+
+define <vscale x 8 x i32> @intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16(<vscale x 8 x i16> %0, <vscale x 8 x i32> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i32_nxv8i32_nxv8i16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e16,m2,ta,mu
+; CHECK-NEXT: vwsubu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i32> @llvm.riscv.vwsubu.w.nxv8i32.nxv8i16(
+ <vscale x 8 x i32> %1,
+ <vscale x 8 x i16> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i32> %a
+}
+
+define <vscale x 1 x i64> @intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32(<vscale x 1 x i32> %0, <vscale x 1 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv1i64_nxv1i64_nxv1i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vwsubu.wv v25, v9, v8
+; CHECK-NEXT: vmv1r.v v8, v25
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 1 x i64> @llvm.riscv.vwsubu.w.nxv1i64.nxv1i32(
+ <vscale x 1 x i64> %1,
+ <vscale x 1 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 1 x i64> %a
+}
+
+define <vscale x 2 x i64> @intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32(<vscale x 2 x i32> %0, <vscale x 2 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv2i64_nxv2i64_nxv2i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m1,ta,mu
+; CHECK-NEXT: vwsubu.wv v26, v10, v8
+; CHECK-NEXT: vmv2r.v v8, v26
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 2 x i64> @llvm.riscv.vwsubu.w.nxv2i64.nxv2i32(
+ <vscale x 2 x i64> %1,
+ <vscale x 2 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 2 x i64> %a
+}
+
+define <vscale x 4 x i64> @intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32(<vscale x 4 x i32> %0, <vscale x 4 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv4i64_nxv4i64_nxv4i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m2,ta,mu
+; CHECK-NEXT: vwsubu.wv v28, v12, v8
+; CHECK-NEXT: vmv4r.v v8, v28
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 4 x i64> @llvm.riscv.vwsubu.w.nxv4i64.nxv4i32(
+ <vscale x 4 x i64> %1,
+ <vscale x 4 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 4 x i64> %a
+}
+
+define <vscale x 8 x i64> @intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32(<vscale x 8 x i32> %0, <vscale x 8 x i64> %1, i64 %2) nounwind {
+; CHECK-LABEL: intrinsic_vwsubu.w_wv_untie_nxv8i64_nxv8i64_nxv8i32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli zero, a0, e32,m4,ta,mu
+; CHECK-NEXT: vwsubu.wv v24, v16, v8
+; CHECK-NEXT: vmv8r.v v8, v24
+; CHECK-NEXT: ret
+entry:
+ %a = call <vscale x 8 x i64> @llvm.riscv.vwsubu.w.nxv8i64.nxv8i32(
+ <vscale x 8 x i64> %1,
+ <vscale x 8 x i32> %0,
+ i64 %2)
+
+ ret <vscale x 8 x i64> %a
+}
More information about the llvm-commits
mailing list