[llvm] 24628a1 - [RISCV] Add patterns for vnsr[a,l].wx where shift amount has different type than vector element
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 21 02:13:37 PDT 2023
Author: Luke Lau
Date: 2023-07-21T10:13:28+01:00
New Revision: 24628a14c4fb63eab8d5e68fb5c3cc49d14c0dcf
URL: https://github.com/llvm/llvm-project/commit/24628a14c4fb63eab8d5e68fb5c3cc49d14c0dcf
DIFF: https://github.com/llvm/llvm-project/commit/24628a14c4fb63eab8d5e68fb5c3cc49d14c0dcf.diff
LOG: [RISCV] Add patterns for vnsr[a,l].wx where shift amount has different type than vector element
We're currently only matching scalar shift amounts where the type is the same
as the vector element type. But because only the bottom log2(2*SEW) bits are
used, only 7 bits will be used at most so we can use any scalar type >= i8.
This patch adds patterns for the case above, as well as for when the shift
amount type is the same as the widened element type and doesn't need extended.
Reviewed By: craig.topper
Differential Revision: https://reviews.llvm.org/D155698
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
llvm/lib/Target/RISCV/RISCVInstrInfo.td
llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll
llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index b9d24df3a65629..8204c5f09f3b8f 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -3016,6 +3016,16 @@ bool RISCVDAGToDAGISel::selectVSplatUimm5(SDValue N, SDValue &SplatVal) {
return true;
}
+bool RISCVDAGToDAGISel::selectExtOneUseVSplat(SDValue N, SDValue &SplatVal) {
+ if (N->getOpcode() == ISD::SIGN_EXTEND ||
+ N->getOpcode() == ISD::ZERO_EXTEND) {
+ if (!N.hasOneUse())
+ return false;
+ N = N->getOperand(0);
+ }
+ return selectVSplat(N, SplatVal);
+}
+
bool RISCVDAGToDAGISel::selectFPImm(SDValue N, SDValue &Imm) {
ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N.getNode());
if (!CFP)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index c116e3e75db937..e99f8d69ebe12c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -131,6 +131,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
bool selectVSplatUimm5(SDValue N, SDValue &SplatVal);
bool selectVSplatSimm5Plus1(SDValue N, SDValue &SplatVal);
bool selectVSplatSimm5Plus1NonZero(SDValue N, SDValue &SplatVal);
+ bool selectExtOneUseVSplat(SDValue N, SDValue &SplatVal);
bool selectFPImm(SDValue N, SDValue &Imm);
bool selectRVVSimm5(SDValue N, unsigned Width, SDValue &Imm);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 72d4cbce67b476..e58e3412aea350 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1234,6 +1234,8 @@ def zexti32 : ComplexPattern<i64, 1, "selectZExtBits<32>">;
def zexti16 : ComplexPattern<XLenVT, 1, "selectZExtBits<16>">;
def zexti8 : ComplexPattern<XLenVT, 1, "selectZExtBits<8>">;
+def ext : PatFrags<(ops node:$A), [(sext node:$A), (zext node:$A)]>;
+
class binop_oneuse<SDPatternOperator operator>
: PatFrag<(ops node:$A, node:$B),
(operator node:$A, node:$B), [{
@@ -1259,6 +1261,7 @@ class unop_oneuse<SDPatternOperator operator>
def sext_oneuse : unop_oneuse<sext>;
def zext_oneuse : unop_oneuse<zext>;
def anyext_oneuse : unop_oneuse<anyext>;
+def ext_oneuse : unop_oneuse<ext>;
def fpext_oneuse : unop_oneuse<any_fpextend>;
/// Simple arithmetic operations
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
index b433f3fb63f94c..8cef4140067904 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td
@@ -569,12 +569,15 @@ foreach kind = ["ADD", "UMAX", "SMAX", "UMIN", "SMIN", "AND", "OR", "XOR",
// Give explicit Complexity to prefer simm5/uimm5.
def SplatPat : ComplexPattern<vAny, 1, "selectVSplat", [], [], 1>;
-def SplatPat_simm5 : ComplexPattern<vAny, 1, "selectVSplatSimm5", [], [], 2>;
-def SplatPat_uimm5 : ComplexPattern<vAny, 1, "selectVSplatUimm5", [], [], 2>;
+def SplatPat_simm5 : ComplexPattern<vAny, 1, "selectVSplatSimm5", [], [], 3>;
+def SplatPat_uimm5 : ComplexPattern<vAny, 1, "selectVSplatUimm5", [], [], 3>;
def SplatPat_simm5_plus1
- : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1", [], [], 2>;
+ : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1", [], [], 3>;
def SplatPat_simm5_plus1_nonzero
- : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1NonZero", [], [], 2>;
+ : ComplexPattern<vAny, 1, "selectVSplatSimm5Plus1NonZero", [], [], 3>;
+
+def ext_oneuse_SplatPat
+ : ComplexPattern<vAny, 1, "selectExtOneUseVSplat", [], [], 2>;
def SelectFPImm : ComplexPattern<fAny, 1, "selectFPImm", [], [], 1>;
@@ -1428,7 +1431,7 @@ multiclass VPatReductionVL_RM<SDNode vop, string instruction_name, bit is_float>
}
}
-multiclass VPatBinaryExtVL_WV_WX<SDNode op, PatFrags extop, string instruction_name> {
+multiclass VPatBinaryExtVL_WV_WX<SDNode op, string instruction_name> {
foreach vtiToWti = AllWidenableIntVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
@@ -1438,17 +1441,18 @@ multiclass VPatBinaryExtVL_WV_WX<SDNode op, PatFrags extop, string instruction_n
(vti.Vector
(riscv_trunc_vector_vl
(op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (extop (vti.Vector vti.RegClass:$rs1)))),
+ (wti.Vector (ext_oneuse (vti.Vector vti.RegClass:$rs1)))),
(vti.Mask true_mask),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WV_"#vti.LMul.MX)
(vti.Vector (IMPLICIT_DEF)),
wti.RegClass:$rs2, vti.RegClass:$rs1, GPR:$vl, vti.Log2SEW, TU_MU)>;
+
def : Pat<
(vti.Vector
(riscv_trunc_vector_vl
(op (wti.Vector wti.RegClass:$rs2),
- (wti.Vector (extop (vti.Vector (SplatPat (XLenVT GPR:$rs1)))))),
+ (wti.Vector (ext_oneuse_SplatPat (XLenVT GPR:$rs1)))),
(vti.Mask true_mask),
VLOpFrag)),
(!cast<Instruction>(instruction_name#"_WX_"#vti.LMul.MX)
@@ -1459,8 +1463,7 @@ multiclass VPatBinaryExtVL_WV_WX<SDNode op, PatFrags extop, string instruction_n
}
multiclass VPatBinaryVL_WV_WX_WI<SDNode op, string instruction_name>
- : VPatBinaryExtVL_WV_WX<op, sext_oneuse, instruction_name>,
- VPatBinaryExtVL_WV_WX<op, zext_oneuse, instruction_name> {
+ : VPatBinaryExtVL_WV_WX<op, instruction_name> {
foreach vtiToWti = AllWidenableIntVectors in {
defvar vti = vtiToWti.Vti;
defvar wti = vtiToWti.Wti;
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index 8b8e10b0c514ca..549a127c31fac5 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -1056,10 +1056,7 @@ define <vscale x 1 x i32> @ctlz_nxv1i32(<vscale x 1 x i32> %va) {
; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-D-NEXT: vsrl.vx v8, v9, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-D-NEXT: vnsrl.wx v8, v9, a0
; CHECK-D-NEXT: li a0, 1054
; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: li a0, 32
@@ -1167,12 +1164,9 @@ define <vscale x 2 x i32> @ctlz_nxv2i32(<vscale x 2 x i32> %va) {
; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-D-NEXT: vsrl.vx v8, v10, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-D-NEXT: vnsrl.wx v8, v10, a0
; CHECK-D-NEXT: li a0, 1054
-; CHECK-D-NEXT: vrsub.vx v8, v10, a0
+; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: li a0, 32
; CHECK-D-NEXT: vminu.vx v8, v8, a0
; CHECK-D-NEXT: ret
@@ -1278,12 +1272,9 @@ define <vscale x 4 x i32> @ctlz_nxv4i32(<vscale x 4 x i32> %va) {
; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-D-NEXT: vsrl.vx v8, v12, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-D-NEXT: vnsrl.wx v8, v12, a0
; CHECK-D-NEXT: li a0, 1054
-; CHECK-D-NEXT: vrsub.vx v8, v12, a0
+; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: li a0, 32
; CHECK-D-NEXT: vminu.vx v8, v8, a0
; CHECK-D-NEXT: ret
@@ -1389,12 +1380,9 @@ define <vscale x 8 x i32> @ctlz_nxv8i32(<vscale x 8 x i32> %va) {
; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-D-NEXT: vsrl.vx v8, v16, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v16, v8, 0
+; CHECK-D-NEXT: vnsrl.wx v8, v16, a0
; CHECK-D-NEXT: li a0, 1054
-; CHECK-D-NEXT: vrsub.vx v8, v16, a0
+; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: li a0, 32
; CHECK-D-NEXT: vminu.vx v8, v8, a0
; CHECK-D-NEXT: ret
@@ -3099,10 +3087,7 @@ define <vscale x 1 x i32> @ctlz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
; CHECK-D-NEXT: vsetvli a0, zero, e32, mf2, ta, ma
; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-D-NEXT: vsrl.vx v8, v9, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-D-NEXT: vnsrl.wx v8, v9, a0
; CHECK-D-NEXT: li a0, 1054
; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
@@ -3205,12 +3190,9 @@ define <vscale x 2 x i32> @ctlz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
; CHECK-D-NEXT: vsetvli a0, zero, e32, m1, ta, ma
; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-D-NEXT: vsrl.vx v8, v10, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-D-NEXT: vnsrl.wx v8, v10, a0
; CHECK-D-NEXT: li a0, 1054
-; CHECK-D-NEXT: vrsub.vx v8, v10, a0
+; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv2i32:
@@ -3311,12 +3293,9 @@ define <vscale x 4 x i32> @ctlz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
; CHECK-D-NEXT: vsetvli a0, zero, e32, m2, ta, ma
; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-D-NEXT: vsrl.vx v8, v12, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-D-NEXT: vnsrl.wx v8, v12, a0
; CHECK-D-NEXT: li a0, 1054
-; CHECK-D-NEXT: vrsub.vx v8, v12, a0
+; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv4i32:
@@ -3417,12 +3396,9 @@ define <vscale x 8 x i32> @ctlz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
; CHECK-D-NEXT: vsetvli a0, zero, e32, m4, ta, ma
; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-D-NEXT: vsrl.vx v8, v16, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v16, v8, 0
+; CHECK-D-NEXT: vnsrl.wx v8, v16, a0
; CHECK-D-NEXT: li a0, 1054
-; CHECK-D-NEXT: vrsub.vx v8, v16, a0
+; CHECK-D-NEXT: vrsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv8i32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index 68134bc0e4e4c9..03eaf5e60c7b62 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -1026,10 +1026,7 @@ define <vscale x 1 x i32> @cttz_nxv1i32(<vscale x 1 x i32> %va) {
; CHECK-D-NEXT: vand.vv v9, v8, v9
; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-D-NEXT: vsrl.vx v9, v10, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v9, v9, 0
+; CHECK-D-NEXT: vnsrl.wx v9, v10, a0
; CHECK-D-NEXT: li a0, 1023
; CHECK-D-NEXT: vsub.vx v9, v9, a0
; CHECK-D-NEXT: vmseq.vi v0, v8, 0
@@ -1129,10 +1126,7 @@ define <vscale x 2 x i32> @cttz_nxv2i32(<vscale x 2 x i32> %va) {
; CHECK-D-NEXT: vand.vv v9, v8, v9
; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v9
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-D-NEXT: vsrl.vx v10, v10, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v9, v10, 0
+; CHECK-D-NEXT: vnsrl.wx v9, v10, a0
; CHECK-D-NEXT: li a0, 1023
; CHECK-D-NEXT: vsub.vx v9, v9, a0
; CHECK-D-NEXT: vmseq.vi v0, v8, 0
@@ -1232,10 +1226,7 @@ define <vscale x 4 x i32> @cttz_nxv4i32(<vscale x 4 x i32> %va) {
; CHECK-D-NEXT: vand.vv v10, v8, v10
; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v10
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-D-NEXT: vsrl.vx v12, v12, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v10, v12, 0
+; CHECK-D-NEXT: vnsrl.wx v10, v12, a0
; CHECK-D-NEXT: li a0, 1023
; CHECK-D-NEXT: vsub.vx v10, v10, a0
; CHECK-D-NEXT: vmseq.vi v0, v8, 0
@@ -1335,10 +1326,7 @@ define <vscale x 8 x i32> @cttz_nxv8i32(<vscale x 8 x i32> %va) {
; CHECK-D-NEXT: vand.vv v12, v8, v12
; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v12
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-D-NEXT: vsrl.vx v16, v16, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v12, v16, 0
+; CHECK-D-NEXT: vnsrl.wx v12, v16, a0
; CHECK-D-NEXT: li a0, 1023
; CHECK-D-NEXT: vsub.vx v12, v12, a0
; CHECK-D-NEXT: vmseq.vi v0, v8, 0
@@ -3089,10 +3077,7 @@ define <vscale x 1 x i32> @cttz_zero_undef_nxv1i32(<vscale x 1 x i32> %va) {
; CHECK-D-NEXT: vand.vv v8, v8, v9
; CHECK-D-NEXT: vfwcvt.f.xu.v v9, v8
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-D-NEXT: vsrl.vx v8, v9, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-D-NEXT: vnsrl.wx v8, v9, a0
; CHECK-D-NEXT: li a0, 1023
; CHECK-D-NEXT: vsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
@@ -3185,12 +3170,9 @@ define <vscale x 2 x i32> @cttz_zero_undef_nxv2i32(<vscale x 2 x i32> %va) {
; CHECK-D-NEXT: vand.vv v8, v8, v9
; CHECK-D-NEXT: vfwcvt.f.xu.v v10, v8
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-D-NEXT: vsrl.vx v8, v10, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v10, v8, 0
+; CHECK-D-NEXT: vnsrl.wx v8, v10, a0
; CHECK-D-NEXT: li a0, 1023
-; CHECK-D-NEXT: vsub.vx v8, v10, a0
+; CHECK-D-NEXT: vsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv2i32:
@@ -3281,12 +3263,9 @@ define <vscale x 4 x i32> @cttz_zero_undef_nxv4i32(<vscale x 4 x i32> %va) {
; CHECK-D-NEXT: vand.vv v8, v8, v10
; CHECK-D-NEXT: vfwcvt.f.xu.v v12, v8
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-D-NEXT: vsrl.vx v8, v12, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v12, v8, 0
+; CHECK-D-NEXT: vnsrl.wx v8, v12, a0
; CHECK-D-NEXT: li a0, 1023
-; CHECK-D-NEXT: vsub.vx v8, v12, a0
+; CHECK-D-NEXT: vsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv4i32:
@@ -3377,12 +3356,9 @@ define <vscale x 8 x i32> @cttz_zero_undef_nxv8i32(<vscale x 8 x i32> %va) {
; CHECK-D-NEXT: vand.vv v8, v8, v12
; CHECK-D-NEXT: vfwcvt.f.xu.v v16, v8
; CHECK-D-NEXT: li a0, 52
-; CHECK-D-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-D-NEXT: vsrl.vx v8, v16, a0
-; CHECK-D-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-D-NEXT: vnsrl.wi v16, v8, 0
+; CHECK-D-NEXT: vnsrl.wx v8, v16, a0
; CHECK-D-NEXT: li a0, 1023
-; CHECK-D-NEXT: vsub.vx v8, v16, a0
+; CHECK-D-NEXT: vsub.vx v8, v8, a0
; CHECK-D-NEXT: ret
;
; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv8i32:
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll
index aafcf19dcadfae..13b7a8e1991acc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsra-sdnode.ll
@@ -7,10 +7,8 @@
define <vscale x 1 x i32> @vnsra_wx_i64_nxv1i32(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vnsra_wx_i64_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-NEXT: vsra.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsra.wx v8, v8, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
@@ -22,10 +20,9 @@ define <vscale x 1 x i32> @vnsra_wx_i64_nxv1i32(<vscale x 1 x i64> %va, i64 %b)
define <vscale x 2 x i32> @vnsra_wx_i64_nxv2i32(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vnsra_wx_i64_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
-; CHECK-NEXT: vsra.vx v10, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsra.wx v10, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
@@ -37,10 +34,9 @@ define <vscale x 2 x i32> @vnsra_wx_i64_nxv2i32(<vscale x 2 x i64> %va, i64 %b)
define <vscale x 4 x i32> @vnsra_wx_i64_nxv4i32(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vnsra_wx_i64_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
-; CHECK-NEXT: vsra.vx v12, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsra.wx v12, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
@@ -52,10 +48,9 @@ define <vscale x 4 x i32> @vnsra_wx_i64_nxv4i32(<vscale x 4 x i64> %va, i64 %b)
define <vscale x 8 x i32> @vnsra_wx_i64_nxv8i32(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vnsra_wx_i64_nxv8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; CHECK-NEXT: vsra.vx v16, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsra.wx v16, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -93,13 +88,8 @@ define <vscale x 1 x i32> @vnsra_wx_i32_nxv1i32_sext(<vscale x 1 x i64> %va, i32
define <vscale x 1 x i32> @vnsra_wx_i16_nxv1i32_sext(<vscale x 1 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsra_wx_i16_nxv1i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vsext.vf4 v10, v9
-; CHECK-NEXT: vsra.vv v8, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsra.wx v8, v8, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -112,13 +102,8 @@ define <vscale x 1 x i32> @vnsra_wx_i16_nxv1i32_sext(<vscale x 1 x i64> %va, i16
define <vscale x 1 x i32> @vnsra_wx_i8_nxv1i32_sext(<vscale x 1 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsra_wx_i8_nxv1i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vsext.vf8 v10, v9
-; CHECK-NEXT: vsra.vv v8, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsra.wx v8, v8, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -173,13 +158,9 @@ define <vscale x 2 x i32> @vnsra_wx_i32_nxv2i32_sext(<vscale x 2 x i64> %va, i32
define <vscale x 2 x i32> @vnsra_wx_i16_nxv2i32_sext(<vscale x 2 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsra_wx_i16_nxv2i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v12, v10
-; CHECK-NEXT: vsra.vv v10, v8, v12
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsra.wx v10, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -192,13 +173,9 @@ define <vscale x 2 x i32> @vnsra_wx_i16_nxv2i32_sext(<vscale x 2 x i64> %va, i16
define <vscale x 2 x i32> @vnsra_wx_i8_nxv2i32_sext(<vscale x 2 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsra_wx_i8_nxv2i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf8 v12, v10
-; CHECK-NEXT: vsra.vv v10, v8, v12
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsra.wx v10, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -254,13 +231,9 @@ define <vscale x 4 x i32> @vnsra_wx_i32_nxv4i32_sext(<vscale x 4 x i64> %va, i32
define <vscale x 4 x i32> @vnsra_wx_i16_nxv4i32_sext(<vscale x 4 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsra_wx_i16_nxv4i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vsext.vf4 v16, v12
-; CHECK-NEXT: vsra.vv v12, v8, v16
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsra.wx v12, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -273,13 +246,9 @@ define <vscale x 4 x i32> @vnsra_wx_i16_nxv4i32_sext(<vscale x 4 x i64> %va, i16
define <vscale x 4 x i32> @vnsra_wx_i8_nxv4i32_sext(<vscale x 4 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsra_wx_i8_nxv4i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vsext.vf8 v16, v12
-; CHECK-NEXT: vsra.vv v12, v8, v16
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsra.wx v12, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -335,13 +304,9 @@ define <vscale x 8 x i32> @vnsra_wx_i32_nxv8i32_sext(<vscale x 8 x i64> %va, i32
define <vscale x 8 x i32> @vnsra_wx_i16_nxv8i32_sext(<vscale x 8 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsra_wx_i16_nxv8i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf4 v24, v16
-; CHECK-NEXT: vsra.vv v16, v8, v24
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsra.wx v16, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -354,13 +319,9 @@ define <vscale x 8 x i32> @vnsra_wx_i16_nxv8i32_sext(<vscale x 8 x i64> %va, i16
define <vscale x 8 x i32> @vnsra_wx_i8_nxv8i32_sext(<vscale x 8 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsra_wx_i8_nxv8i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf8 v24, v16
-; CHECK-NEXT: vsra.vv v16, v8, v24
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsra.wx v16, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -414,13 +375,8 @@ define <vscale x 1 x i32> @vnsra_wx_i32_nxv1i32_zext(<vscale x 1 x i64> %va, i32
define <vscale x 1 x i32> @vnsra_wx_i16_nxv1i32_zext(<vscale x 1 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsra_wx_i16_nxv1i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v9
-; CHECK-NEXT: vsra.vv v8, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsra.wx v8, v8, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -433,13 +389,8 @@ define <vscale x 1 x i32> @vnsra_wx_i16_nxv1i32_zext(<vscale x 1 x i64> %va, i16
define <vscale x 1 x i32> @vnsra_wx_i8_nxv1i32_zext(<vscale x 1 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsra_wx_i8_nxv1i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vzext.vf8 v10, v9
-; CHECK-NEXT: vsra.vv v8, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsra.wx v8, v8, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -494,13 +445,9 @@ define <vscale x 2 x i32> @vnsra_wx_i32_nxv2i32_zext(<vscale x 2 x i64> %va, i32
define <vscale x 2 x i32> @vnsra_wx_i16_nxv2i32_zext(<vscale x 2 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsra_wx_i16_nxv2i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v10
-; CHECK-NEXT: vsra.vv v10, v8, v12
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsra.wx v10, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -513,13 +460,9 @@ define <vscale x 2 x i32> @vnsra_wx_i16_nxv2i32_zext(<vscale x 2 x i64> %va, i16
define <vscale x 2 x i32> @vnsra_wx_i8_nxv2i32_zext(<vscale x 2 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsra_wx_i8_nxv2i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf8 v12, v10
-; CHECK-NEXT: vsra.vv v10, v8, v12
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsra.wx v10, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -575,13 +518,9 @@ define <vscale x 4 x i32> @vnsra_wx_i32_nxv4i32_zext(<vscale x 4 x i64> %va, i32
define <vscale x 4 x i32> @vnsra_wx_i16_nxv4i32_zext(<vscale x 4 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsra_wx_i16_nxv4i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v16, v12
-; CHECK-NEXT: vsra.vv v12, v8, v16
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsra.wx v12, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -594,13 +533,9 @@ define <vscale x 4 x i32> @vnsra_wx_i16_nxv4i32_zext(<vscale x 4 x i64> %va, i16
define <vscale x 4 x i32> @vnsra_wx_i8_nxv4i32_zext(<vscale x 4 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsra_wx_i8_nxv4i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf8 v16, v12
-; CHECK-NEXT: vsra.vv v12, v8, v16
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsra.wx v12, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -656,13 +591,9 @@ define <vscale x 8 x i32> @vnsra_wx_i32_nxv8i32_zext(<vscale x 8 x i64> %va, i32
define <vscale x 8 x i32> @vnsra_wx_i16_nxv8i32_zext(<vscale x 8 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsra_wx_i16_nxv8i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vzext.vf4 v24, v16
-; CHECK-NEXT: vsra.vv v16, v8, v24
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsra.wx v16, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -675,13 +606,9 @@ define <vscale x 8 x i32> @vnsra_wx_i16_nxv8i32_zext(<vscale x 8 x i64> %va, i16
define <vscale x 8 x i32> @vnsra_wx_i8_nxv8i32_zext(<vscale x 8 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsra_wx_i8_nxv8i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vzext.vf8 v24, v16
-; CHECK-NEXT: vsra.vv v16, v8, v24
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsra.wx v16, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll
index 1c81292caf7f5b..30eaabfb6c5a81 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vnsrl-sdnode.ll
@@ -7,10 +7,8 @@
define <vscale x 1 x i32> @vnsrl_wx_i64_nxv1i32(<vscale x 1 x i64> %va, i64 %b) {
; CHECK-LABEL: vnsrl_wx_i64_nxv1i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma
-; CHECK-NEXT: vsrl.vx v8, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wx v8, v8, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 1 x i64> %head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
@@ -22,10 +20,9 @@ define <vscale x 1 x i32> @vnsrl_wx_i64_nxv1i32(<vscale x 1 x i64> %va, i64 %b)
define <vscale x 2 x i32> @vnsrl_wx_i64_nxv2i32(<vscale x 2 x i64> %va, i64 %b) {
; CHECK-LABEL: vnsrl_wx_i64_nxv2i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma
-; CHECK-NEXT: vsrl.vx v10, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wx v10, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 2 x i64> %head, <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
@@ -37,10 +34,9 @@ define <vscale x 2 x i32> @vnsrl_wx_i64_nxv2i32(<vscale x 2 x i64> %va, i64 %b)
define <vscale x 4 x i32> @vnsrl_wx_i64_nxv4i32(<vscale x 4 x i64> %va, i64 %b) {
; CHECK-LABEL: vnsrl_wx_i64_nxv4i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma
-; CHECK-NEXT: vsrl.vx v12, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsrl.wx v12, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 4 x i64> %head, <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
@@ -52,10 +48,9 @@ define <vscale x 4 x i32> @vnsrl_wx_i64_nxv4i32(<vscale x 4 x i64> %va, i64 %b)
define <vscale x 8 x i32> @vnsrl_wx_i64_nxv8i32(<vscale x 8 x i64> %va, i64 %b) {
; CHECK-LABEL: vnsrl_wx_i64_nxv8i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma
-; CHECK-NEXT: vsrl.vx v16, v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsrl.wx v16, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i64> poison, i64 %b, i32 0
%splat = shufflevector <vscale x 8 x i64> %head, <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
@@ -93,13 +88,8 @@ define <vscale x 1 x i32> @vnsrl_wx_i32_nxv1i32_sext(<vscale x 1 x i64> %va, i32
define <vscale x 1 x i32> @vnsrl_wx_i16_nxv1i32_sext(<vscale x 1 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsrl_wx_i16_nxv1i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vsext.vf4 v10, v9
-; CHECK-NEXT: vsrl.vv v8, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wx v8, v8, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -112,13 +102,8 @@ define <vscale x 1 x i32> @vnsrl_wx_i16_nxv1i32_sext(<vscale x 1 x i64> %va, i16
define <vscale x 1 x i32> @vnsrl_wx_i8_nxv1i32_sext(<vscale x 1 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsrl_wx_i8_nxv1i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vsext.vf8 v10, v9
-; CHECK-NEXT: vsrl.vv v8, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wx v8, v8, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -173,13 +158,9 @@ define <vscale x 2 x i32> @vnsrl_wx_i32_nxv2i32_sext(<vscale x 2 x i64> %va, i32
define <vscale x 2 x i32> @vnsrl_wx_i16_nxv2i32_sext(<vscale x 2 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsrl_wx_i16_nxv2i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf4 v12, v10
-; CHECK-NEXT: vsrl.vv v10, v8, v12
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wx v10, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -192,13 +173,9 @@ define <vscale x 2 x i32> @vnsrl_wx_i16_nxv2i32_sext(<vscale x 2 x i64> %va, i16
define <vscale x 2 x i32> @vnsrl_wx_i8_nxv2i32_sext(<vscale x 2 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsrl_wx_i8_nxv2i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf8 v12, v10
-; CHECK-NEXT: vsrl.vv v10, v8, v12
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wx v10, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -254,13 +231,9 @@ define <vscale x 4 x i32> @vnsrl_wx_i32_nxv4i32_sext(<vscale x 4 x i64> %va, i32
define <vscale x 4 x i32> @vnsrl_wx_i16_nxv4i32_sext(<vscale x 4 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsrl_wx_i16_nxv4i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vsext.vf4 v16, v12
-; CHECK-NEXT: vsrl.vv v12, v8, v16
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsrl.wx v12, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -273,13 +246,9 @@ define <vscale x 4 x i32> @vnsrl_wx_i16_nxv4i32_sext(<vscale x 4 x i64> %va, i16
define <vscale x 4 x i32> @vnsrl_wx_i8_nxv4i32_sext(<vscale x 4 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsrl_wx_i8_nxv4i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vsext.vf8 v16, v12
-; CHECK-NEXT: vsrl.vv v12, v8, v16
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsrl.wx v12, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -335,13 +304,9 @@ define <vscale x 8 x i32> @vnsrl_wx_i32_nxv8i32_sext(<vscale x 8 x i64> %va, i32
define <vscale x 8 x i32> @vnsrl_wx_i16_nxv8i32_sext(<vscale x 8 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsrl_wx_i16_nxv8i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf4 v24, v16
-; CHECK-NEXT: vsrl.vv v16, v8, v24
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsrl.wx v16, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -354,13 +319,9 @@ define <vscale x 8 x i32> @vnsrl_wx_i16_nxv8i32_sext(<vscale x 8 x i64> %va, i16
define <vscale x 8 x i32> @vnsrl_wx_i8_nxv8i32_sext(<vscale x 8 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsrl_wx_i8_nxv8i32_sext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf8 v24, v16
-; CHECK-NEXT: vsrl.vv v16, v8, v24
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsrl.wx v16, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
@@ -414,13 +375,8 @@ define <vscale x 1 x i32> @vnsrl_wx_i32_nxv1i32_zext(<vscale x 1 x i64> %va, i32
define <vscale x 1 x i32> @vnsrl_wx_i16_nxv1i32_zext(<vscale x 1 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsrl_wx_i16_nxv1i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vzext.vf4 v10, v9
-; CHECK-NEXT: vsrl.vv v8, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wx v8, v8, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 1 x i16> %head, <vscale x 1 x i16> poison, <vscale x 1 x i32> zeroinitializer
@@ -433,13 +389,8 @@ define <vscale x 1 x i32> @vnsrl_wx_i16_nxv1i32_zext(<vscale x 1 x i64> %va, i16
define <vscale x 1 x i32> @vnsrl_wx_i8_nxv1i32_zext(<vscale x 1 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsrl_wx_i8_nxv1i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma
-; CHECK-NEXT: vsext.vf8 v10, v9
-; CHECK-NEXT: vsrl.vv v8, v8, v10
-; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v8, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vnsrl.wx v8, v8, a0
; CHECK-NEXT: ret
%head = insertelement <vscale x 1 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 1 x i8> %head, <vscale x 1 x i8> poison, <vscale x 1 x i32> zeroinitializer
@@ -494,13 +445,9 @@ define <vscale x 2 x i32> @vnsrl_wx_i32_nxv2i32_zext(<vscale x 2 x i64> %va, i32
define <vscale x 2 x i32> @vnsrl_wx_i16_nxv2i32_zext(<vscale x 2 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsrl_wx_i16_nxv2i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vzext.vf4 v12, v10
-; CHECK-NEXT: vsrl.vv v10, v8, v12
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wx v10, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 2 x i16> %head, <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
@@ -513,13 +460,9 @@ define <vscale x 2 x i32> @vnsrl_wx_i16_nxv2i32_zext(<vscale x 2 x i64> %va, i16
define <vscale x 2 x i32> @vnsrl_wx_i8_nxv2i32_zext(<vscale x 2 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsrl_wx_i8_nxv2i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma
-; CHECK-NEXT: vmv.v.x v10, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma
-; CHECK-NEXT: vsext.vf8 v12, v10
-; CHECK-NEXT: vsrl.vv v10, v8, v12
-; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v10, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma
+; CHECK-NEXT: vnsrl.wx v10, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v10
; CHECK-NEXT: ret
%head = insertelement <vscale x 2 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 2 x i8> %head, <vscale x 2 x i8> poison, <vscale x 2 x i32> zeroinitializer
@@ -575,13 +518,9 @@ define <vscale x 4 x i32> @vnsrl_wx_i32_nxv4i32_zext(<vscale x 4 x i64> %va, i32
define <vscale x 4 x i32> @vnsrl_wx_i16_nxv4i32_zext(<vscale x 4 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsrl_wx_i16_nxv4i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vzext.vf4 v16, v12
-; CHECK-NEXT: vsrl.vv v12, v8, v16
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsrl.wx v12, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 4 x i16> %head, <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
@@ -594,13 +533,9 @@ define <vscale x 4 x i32> @vnsrl_wx_i16_nxv4i32_zext(<vscale x 4 x i64> %va, i16
define <vscale x 4 x i32> @vnsrl_wx_i8_nxv4i32_zext(<vscale x 4 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsrl_wx_i8_nxv4i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vmv.v.x v12, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
-; CHECK-NEXT: vsext.vf8 v16, v12
-; CHECK-NEXT: vsrl.vv v12, v8, v16
-; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v12, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m2, ta, ma
+; CHECK-NEXT: vnsrl.wx v12, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%head = insertelement <vscale x 4 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 4 x i8> %head, <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
@@ -656,13 +591,9 @@ define <vscale x 8 x i32> @vnsrl_wx_i32_nxv8i32_zext(<vscale x 8 x i64> %va, i32
define <vscale x 8 x i32> @vnsrl_wx_i16_nxv8i32_zext(<vscale x 8 x i64> %va, i16 %b) {
; CHECK-LABEL: vnsrl_wx_i16_nxv8i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vzext.vf4 v24, v16
-; CHECK-NEXT: vsrl.vv v16, v8, v24
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsrl.wx v16, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i16> poison, i16 %b, i32 0
%splat = shufflevector <vscale x 8 x i16> %head, <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
@@ -675,13 +606,9 @@ define <vscale x 8 x i32> @vnsrl_wx_i16_nxv8i32_zext(<vscale x 8 x i64> %va, i16
define <vscale x 8 x i32> @vnsrl_wx_i8_nxv8i32_zext(<vscale x 8 x i64> %va, i8 %b) {
; CHECK-LABEL: vnsrl_wx_i8_nxv8i32_zext:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsetvli a1, zero, e8, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v16, a0
-; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma
-; CHECK-NEXT: vsext.vf8 v24, v16
-; CHECK-NEXT: vsrl.vv v16, v8, v24
-; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
-; CHECK-NEXT: vnsrl.wi v8, v16, 0
+; CHECK-NEXT: vsetvli a1, zero, e32, m4, ta, ma
+; CHECK-NEXT: vnsrl.wx v16, v8, a0
+; CHECK-NEXT: vmv.v.v v8, v16
; CHECK-NEXT: ret
%head = insertelement <vscale x 8 x i8> poison, i8 %b, i32 0
%splat = shufflevector <vscale x 8 x i8> %head, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
More information about the llvm-commits
mailing list