[llvm] 28cb508 - [TargetLowering][RISCV] Allow truncation when checking if the arguments of a setcc are splats.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Apr 11 09:50:28 PDT 2022


Author: Craig Topper
Date: 2022-04-11T09:49:36-07:00
New Revision: 28cb508195be1b005a7e1cb53a75bff0dd4c3bcb

URL: https://github.com/llvm/llvm-project/commit/28cb508195be1b005a7e1cb53a75bff0dd4c3bcb
DIFF: https://github.com/llvm/llvm-project/commit/28cb508195be1b005a7e1cb53a75bff0dd4c3bcb.diff

LOG: [TargetLowering][RISCV] Allow truncation when checking if the arguments of a setcc are splats.

We're just trying to canonicalize here and won't be using the constant
value returned.

The attached test changes are because we were previously commuting
a seteq X, (splat_vector 0) because we also have (sub 0, X). The
0 is larger than the element type so we don't detect it as a splat
without the AllowTruncation flag. By preventing the commute we are
able to match it to the vmseq.vx instruction during isel. We only
look for constants on the RHS in isel.

Reviewed By: spatel

Differential Revision: https://reviews.llvm.org/D123256

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index d38a5a154d688..65a5eba422de3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3955,13 +3955,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
     return Fold;
 
+  bool N0ConstOrSplat =
+      isConstOrConstSplat(N0, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
+  bool N1ConstOrSplat =
+      isConstOrConstSplat(N1, /*AllowUndefs*/ false, /*AllowTruncate*/ true);
+
   // Ensure that the constant occurs on the RHS and fold constant comparisons.
   // TODO: Handle non-splat vector constants. All undef causes trouble.
   // FIXME: We can't yet fold constant scalable vector splats, so avoid an
   // infinite loop here when we encounter one.
   ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
-  if (isConstOrConstSplat(N0) &&
-      (!OpVT.isScalableVector() || !isConstOrConstSplat(N1)) &&
+  if (N0ConstOrSplat && (!OpVT.isScalableVector() || !N1ConstOrSplat) &&
       (DCI.isBeforeLegalizeOps() ||
        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
     return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
@@ -3970,7 +3974,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   // -- but in reverse order -- then try to commute the operands of this setcc
   // to match. A matching pair of setcc (cmp) and sub may be combined into 1
   // instruction on some targets.
-  if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
+  if (!N0ConstOrSplat && !N1ConstOrSplat &&
       (DCI.isBeforeLegalizeOps() ||
        isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
       DAG.doesNodeExist(ISD::SUB, DAG.getVTList(OpVT), {N1, N0}) &&

diff  --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index df1fe1a28b381..9f5a503f56d94 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -50,36 +50,36 @@ define <vscale x 1 x i8> @cttz_nxv1i8(<vscale x 1 x i8> %va) {
 ; RV32D-LABEL: cttz_nxv1i8:
 ; RV32D:       # %bb.0:
 ; RV32D-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; RV32D-NEXT:    vmseq.vx v0, v8, zero
 ; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
+; RV32D-NEXT:    vand.vv v9, v8, v9
 ; RV32D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV32D-NEXT:    vzext.vf4 v9, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v9
+; RV32D-NEXT:    vzext.vf4 v10, v9
+; RV32D-NEXT:    vfcvt.f.xu.v v9, v10
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 23
+; RV32D-NEXT:    vnsrl.wi v9, v9, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v8
+; RV32D-NEXT:    vncvt.x.x.w v9, v9
 ; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
+; RV32D-NEXT:    vmseq.vi v0, v8, 0
+; RV32D-NEXT:    vsub.vx v8, v9, a0
 ; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RV32D-NEXT:    ret
 ;
 ; RV64D-LABEL: cttz_nxv1i8:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e8, mf8, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
+; RV64D-NEXT:    vand.vv v9, v8, v9
 ; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vzext.vf4 v9, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v9
+; RV64D-NEXT:    vzext.vf4 v10, v9
+; RV64D-NEXT:    vfcvt.f.xu.v v9, v10
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 23
+; RV64D-NEXT:    vnsrl.wi v9, v9, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf8, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v8
+; RV64D-NEXT:    vncvt.x.x.w v9, v9
 ; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
+; RV64D-NEXT:    vsub.vx v8, v9, a0
 ; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 1 x i8> @llvm.cttz.nxv1i8(<vscale x 1 x i8> %va, i1 false)
@@ -133,36 +133,36 @@ define <vscale x 2 x i8> @cttz_nxv2i8(<vscale x 2 x i8> %va) {
 ; RV32D-LABEL: cttz_nxv2i8:
 ; RV32D:       # %bb.0:
 ; RV32D-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; RV32D-NEXT:    vmseq.vx v0, v8, zero
 ; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
+; RV32D-NEXT:    vand.vv v9, v8, v9
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV32D-NEXT:    vzext.vf4 v9, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v9
+; RV32D-NEXT:    vzext.vf4 v10, v9
+; RV32D-NEXT:    vfcvt.f.xu.v v9, v10
 ; RV32D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v8, v8, 23
+; RV32D-NEXT:    vnsrl.wi v9, v9, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v8
+; RV32D-NEXT:    vncvt.x.x.w v9, v9
 ; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
+; RV32D-NEXT:    vmseq.vi v0, v8, 0
+; RV32D-NEXT:    vsub.vx v8, v9, a0
 ; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RV32D-NEXT:    ret
 ;
 ; RV64D-LABEL: cttz_nxv2i8:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e8, mf4, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
+; RV64D-NEXT:    vand.vv v9, v8, v9
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vzext.vf4 v9, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v9
+; RV64D-NEXT:    vzext.vf4 v10, v9
+; RV64D-NEXT:    vfcvt.f.xu.v v9, v10
 ; RV64D-NEXT:    vsetvli zero, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v8, v8, 23
+; RV64D-NEXT:    vnsrl.wi v9, v9, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf4, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v8
+; RV64D-NEXT:    vncvt.x.x.w v9, v9
 ; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
+; RV64D-NEXT:    vsub.vx v8, v9, a0
 ; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 2 x i8> @llvm.cttz.nxv2i8(<vscale x 2 x i8> %va, i1 false)
@@ -216,36 +216,36 @@ define <vscale x 4 x i8> @cttz_nxv4i8(<vscale x 4 x i8> %va) {
 ; RV32D-LABEL: cttz_nxv4i8:
 ; RV32D:       # %bb.0:
 ; RV32D-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; RV32D-NEXT:    vmseq.vx v0, v8, zero
 ; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
+; RV32D-NEXT:    vand.vv v9, v8, v9
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV32D-NEXT:    vzext.vf4 v10, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v10
+; RV32D-NEXT:    vzext.vf4 v10, v9
+; RV32D-NEXT:    vfcvt.f.xu.v v10, v10
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vnsrl.wi v10, v8, 23
+; RV32D-NEXT:    vnsrl.wi v9, v10, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v10
+; RV32D-NEXT:    vncvt.x.x.w v9, v9
 ; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
+; RV32D-NEXT:    vmseq.vi v0, v8, 0
+; RV32D-NEXT:    vsub.vx v8, v9, a0
 ; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RV32D-NEXT:    ret
 ;
 ; RV64D-LABEL: cttz_nxv4i8:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e8, mf2, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
+; RV64D-NEXT:    vand.vv v9, v8, v9
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vzext.vf4 v10, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v10
+; RV64D-NEXT:    vzext.vf4 v10, v9
+; RV64D-NEXT:    vfcvt.f.xu.v v10, v10
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vnsrl.wi v10, v8, 23
+; RV64D-NEXT:    vnsrl.wi v9, v10, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e8, mf2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v10
+; RV64D-NEXT:    vncvt.x.x.w v9, v9
 ; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
+; RV64D-NEXT:    vsub.vx v8, v9, a0
 ; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 4 x i8> @llvm.cttz.nxv4i8(<vscale x 4 x i8> %va, i1 false)
@@ -299,36 +299,36 @@ define <vscale x 8 x i8> @cttz_nxv8i8(<vscale x 8 x i8> %va) {
 ; RV32D-LABEL: cttz_nxv8i8:
 ; RV32D:       # %bb.0:
 ; RV32D-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; RV32D-NEXT:    vmseq.vx v0, v8, zero
 ; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
+; RV32D-NEXT:    vand.vv v9, v8, v9
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV32D-NEXT:    vzext.vf4 v12, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v12
+; RV32D-NEXT:    vzext.vf4 v12, v9
+; RV32D-NEXT:    vfcvt.f.xu.v v12, v12
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vnsrl.wi v12, v8, 23
+; RV32D-NEXT:    vnsrl.wi v10, v12, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v12
+; RV32D-NEXT:    vncvt.x.x.w v9, v10
 ; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
+; RV32D-NEXT:    vmseq.vi v0, v8, 0
+; RV32D-NEXT:    vsub.vx v8, v9, a0
 ; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RV32D-NEXT:    ret
 ;
 ; RV64D-LABEL: cttz_nxv8i8:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e8, m1, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
+; RV64D-NEXT:    vand.vv v9, v8, v9
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vzext.vf4 v12, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v12
+; RV64D-NEXT:    vzext.vf4 v12, v9
+; RV64D-NEXT:    vfcvt.f.xu.v v12, v12
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vnsrl.wi v12, v8, 23
+; RV64D-NEXT:    vnsrl.wi v10, v12, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e8, m1, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v12
+; RV64D-NEXT:    vncvt.x.x.w v9, v10
 ; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
+; RV64D-NEXT:    vsub.vx v8, v9, a0
 ; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 8 x i8> @llvm.cttz.nxv8i8(<vscale x 8 x i8> %va, i1 false)
@@ -382,36 +382,36 @@ define <vscale x 16 x i8> @cttz_nxv16i8(<vscale x 16 x i8> %va) {
 ; RV32D-LABEL: cttz_nxv16i8:
 ; RV32D:       # %bb.0:
 ; RV32D-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; RV32D-NEXT:    vmseq.vx v0, v8, zero
 ; RV32D-NEXT:    vrsub.vi v10, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v10
+; RV32D-NEXT:    vand.vv v10, v8, v10
 ; RV32D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; RV32D-NEXT:    vzext.vf4 v16, v8
-; RV32D-NEXT:    vfcvt.f.xu.v v8, v16
+; RV32D-NEXT:    vzext.vf4 v16, v10
+; RV32D-NEXT:    vfcvt.f.xu.v v16, v16
 ; RV32D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vnsrl.wi v16, v8, 23
+; RV32D-NEXT:    vnsrl.wi v12, v16, 23
 ; RV32D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV32D-NEXT:    vncvt.x.x.w v8, v16
+; RV32D-NEXT:    vncvt.x.x.w v10, v12
 ; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
+; RV32D-NEXT:    vmseq.vi v0, v8, 0
+; RV32D-NEXT:    vsub.vx v8, v10, a0
 ; RV32D-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RV32D-NEXT:    ret
 ;
 ; RV64D-LABEL: cttz_nxv16i8:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e8, m2, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v10, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v10
+; RV64D-NEXT:    vand.vv v10, v8, v10
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m8, ta, mu
-; RV64D-NEXT:    vzext.vf4 v16, v8
-; RV64D-NEXT:    vfcvt.f.xu.v v8, v16
+; RV64D-NEXT:    vzext.vf4 v16, v10
+; RV64D-NEXT:    vfcvt.f.xu.v v16, v16
 ; RV64D-NEXT:    vsetvli zero, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vnsrl.wi v16, v8, 23
+; RV64D-NEXT:    vnsrl.wi v12, v16, 23
 ; RV64D-NEXT:    vsetvli zero, zero, e8, m2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v16
+; RV64D-NEXT:    vncvt.x.x.w v10, v12
 ; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
+; RV64D-NEXT:    vsub.vx v8, v10, a0
 ; RV64D-NEXT:    vmerge.vim v8, v8, 8, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 16 x i8> @llvm.cttz.nxv16i8(<vscale x 16 x i8> %va, i1 false)
@@ -531,29 +531,29 @@ define <vscale x 1 x i16> @cttz_nxv1i16(<vscale x 1 x i16> %va) {
 ; RV32D-LABEL: cttz_nxv1i16:
 ; RV32D:       # %bb.0:
 ; RV32D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; RV32D-NEXT:    vmseq.vx v0, v8, zero
 ; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV32D-NEXT:    vnsrl.wi v8, v9, 23
+; RV32D-NEXT:    vand.vv v9, v8, v9
+; RV32D-NEXT:    vfwcvt.f.xu.v v10, v9
+; RV32D-NEXT:    vnsrl.wi v9, v10, 23
 ; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
+; RV32D-NEXT:    vsub.vx v9, v9, a0
+; RV32D-NEXT:    vmseq.vi v0, v8, 0
 ; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
 ; RV32D-NEXT:    ret
 ;
 ; RV64D-LABEL: cttz_nxv1i16:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e16, mf4, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV64D-NEXT:    vnsrl.wi v8, v9, 23
+; RV64D-NEXT:    vand.vv v9, v8, v9
+; RV64D-NEXT:    vfwcvt.f.xu.v v10, v9
+; RV64D-NEXT:    vnsrl.wi v9, v10, 23
 ; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
+; RV64D-NEXT:    vsub.vx v9, v9, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 1 x i16> @llvm.cttz.nxv1i16(<vscale x 1 x i16> %va, i1 false)
   ret <vscale x 1 x i16> %a
@@ -620,29 +620,29 @@ define <vscale x 2 x i16> @cttz_nxv2i16(<vscale x 2 x i16> %va) {
 ; RV32D-LABEL: cttz_nxv2i16:
 ; RV32D:       # %bb.0:
 ; RV32D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV32D-NEXT:    vmseq.vx v0, v8, zero
 ; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV32D-NEXT:    vnsrl.wi v8, v9, 23
+; RV32D-NEXT:    vand.vv v9, v8, v9
+; RV32D-NEXT:    vfwcvt.f.xu.v v10, v9
+; RV32D-NEXT:    vnsrl.wi v9, v10, 23
 ; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
+; RV32D-NEXT:    vsub.vx v9, v9, a0
+; RV32D-NEXT:    vmseq.vi v0, v8, 0
 ; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
 ; RV32D-NEXT:    ret
 ;
 ; RV64D-LABEL: cttz_nxv2i16:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e16, mf2, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v9, v8
-; RV64D-NEXT:    vnsrl.wi v8, v9, 23
+; RV64D-NEXT:    vand.vv v9, v8, v9
+; RV64D-NEXT:    vfwcvt.f.xu.v v10, v9
+; RV64D-NEXT:    vnsrl.wi v9, v10, 23
 ; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
+; RV64D-NEXT:    vsub.vx v9, v9, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 2 x i16> @llvm.cttz.nxv2i16(<vscale x 2 x i16> %va, i1 false)
   ret <vscale x 2 x i16> %a
@@ -709,29 +709,29 @@ define <vscale x 4 x i16> @cttz_nxv4i16(<vscale x 4 x i16> %va) {
 ; RV32D-LABEL: cttz_nxv4i16:
 ; RV32D:       # %bb.0:
 ; RV32D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; RV32D-NEXT:    vmseq.vx v0, v8, zero
 ; RV32D-NEXT:    vrsub.vi v9, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v9
-; RV32D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV32D-NEXT:    vnsrl.wi v8, v10, 23
+; RV32D-NEXT:    vand.vv v9, v8, v9
+; RV32D-NEXT:    vfwcvt.f.xu.v v10, v9
+; RV32D-NEXT:    vnsrl.wi v9, v10, 23
 ; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
+; RV32D-NEXT:    vsub.vx v9, v9, a0
+; RV32D-NEXT:    vmseq.vi v0, v8, 0
 ; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV32D-NEXT:    vmerge.vxm v8, v9, a0, v0
 ; RV32D-NEXT:    ret
 ;
 ; RV64D-LABEL: cttz_nxv4i16:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e16, m1, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
-; RV64D-NEXT:    vnsrl.wi v8, v10, 23
+; RV64D-NEXT:    vand.vv v9, v8, v9
+; RV64D-NEXT:    vfwcvt.f.xu.v v10, v9
+; RV64D-NEXT:    vnsrl.wi v9, v10, 23
 ; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
+; RV64D-NEXT:    vsub.vx v9, v9, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 4 x i16> @llvm.cttz.nxv4i16(<vscale x 4 x i16> %va, i1 false)
   ret <vscale x 4 x i16> %a
@@ -798,29 +798,29 @@ define <vscale x 8 x i16> @cttz_nxv8i16(<vscale x 8 x i16> %va) {
 ; RV32D-LABEL: cttz_nxv8i16:
 ; RV32D:       # %bb.0:
 ; RV32D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; RV32D-NEXT:    vmseq.vx v0, v8, zero
 ; RV32D-NEXT:    vrsub.vi v10, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v10
-; RV32D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV32D-NEXT:    vnsrl.wi v8, v12, 23
+; RV32D-NEXT:    vand.vv v10, v8, v10
+; RV32D-NEXT:    vfwcvt.f.xu.v v12, v10
+; RV32D-NEXT:    vnsrl.wi v10, v12, 23
 ; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
+; RV32D-NEXT:    vsub.vx v10, v10, a0
+; RV32D-NEXT:    vmseq.vi v0, v8, 0
 ; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV32D-NEXT:    vmerge.vxm v8, v10, a0, v0
 ; RV32D-NEXT:    ret
 ;
 ; RV64D-LABEL: cttz_nxv8i16:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e16, m2, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v10, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v10
-; RV64D-NEXT:    vfwcvt.f.xu.v v12, v8
-; RV64D-NEXT:    vnsrl.wi v8, v12, 23
+; RV64D-NEXT:    vand.vv v10, v8, v10
+; RV64D-NEXT:    vfwcvt.f.xu.v v12, v10
+; RV64D-NEXT:    vnsrl.wi v10, v12, 23
 ; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
+; RV64D-NEXT:    vsub.vx v10, v10, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64D-NEXT:    vmerge.vxm v8, v10, a0, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 8 x i16> @llvm.cttz.nxv8i16(<vscale x 8 x i16> %va, i1 false)
   ret <vscale x 8 x i16> %a
@@ -887,29 +887,29 @@ define <vscale x 16 x i16> @cttz_nxv16i16(<vscale x 16 x i16> %va) {
 ; RV32D-LABEL: cttz_nxv16i16:
 ; RV32D:       # %bb.0:
 ; RV32D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; RV32D-NEXT:    vmseq.vx v0, v8, zero
 ; RV32D-NEXT:    vrsub.vi v12, v8, 0
-; RV32D-NEXT:    vand.vv v8, v8, v12
-; RV32D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV32D-NEXT:    vnsrl.wi v8, v16, 23
+; RV32D-NEXT:    vand.vv v12, v8, v12
+; RV32D-NEXT:    vfwcvt.f.xu.v v16, v12
+; RV32D-NEXT:    vnsrl.wi v12, v16, 23
 ; RV32D-NEXT:    li a0, 127
-; RV32D-NEXT:    vsub.vx v8, v8, a0
+; RV32D-NEXT:    vsub.vx v12, v12, a0
+; RV32D-NEXT:    vmseq.vi v0, v8, 0
 ; RV32D-NEXT:    li a0, 16
-; RV32D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV32D-NEXT:    vmerge.vxm v8, v12, a0, v0
 ; RV32D-NEXT:    ret
 ;
 ; RV64D-LABEL: cttz_nxv16i16:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e16, m4, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v12, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v12
-; RV64D-NEXT:    vfwcvt.f.xu.v v16, v8
-; RV64D-NEXT:    vnsrl.wi v8, v16, 23
+; RV64D-NEXT:    vand.vv v12, v8, v12
+; RV64D-NEXT:    vfwcvt.f.xu.v v16, v12
+; RV64D-NEXT:    vnsrl.wi v12, v16, 23
 ; RV64D-NEXT:    li a0, 127
-; RV64D-NEXT:    vsub.vx v8, v8, a0
+; RV64D-NEXT:    vsub.vx v12, v12, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    li a0, 16
-; RV64D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64D-NEXT:    vmerge.vxm v8, v12, a0, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 16 x i16> @llvm.cttz.nxv16i16(<vscale x 16 x i16> %va, i1 false)
   ret <vscale x 16 x i16> %a
@@ -1057,19 +1057,19 @@ define <vscale x 1 x i32> @cttz_nxv1i32(<vscale x 1 x i32> %va) {
 ; RV64D-LABEL: cttz_nxv1i32:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v9, v8
+; RV64D-NEXT:    vand.vv v9, v8, v9
+; RV64D-NEXT:    vfwcvt.f.xu.v v10, v9
 ; RV64D-NEXT:    li a0, 52
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m1, ta, mu
-; RV64D-NEXT:    vsrl.vx v8, v9, a0
+; RV64D-NEXT:    vsrl.vx v9, v10, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, mf2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v8, v8
+; RV64D-NEXT:    vncvt.x.x.w v9, v9
 ; RV64D-NEXT:    li a0, 1023
-; RV64D-NEXT:    vsub.vx v8, v8, a0
+; RV64D-NEXT:    vsub.vx v9, v9, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    li a0, 32
-; RV64D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 1 x i32> @llvm.cttz.nxv1i32(<vscale x 1 x i32> %va, i1 false)
   ret <vscale x 1 x i32> %a
@@ -1156,19 +1156,19 @@ define <vscale x 2 x i32> @cttz_nxv2i32(<vscale x 2 x i32> %va) {
 ; RV64D-LABEL: cttz_nxv2i32:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v9, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v9
-; RV64D-NEXT:    vfwcvt.f.xu.v v10, v8
+; RV64D-NEXT:    vand.vv v9, v8, v9
+; RV64D-NEXT:    vfwcvt.f.xu.v v10, v9
 ; RV64D-NEXT:    li a0, 52
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m2, ta, mu
-; RV64D-NEXT:    vsrl.vx v8, v10, a0
+; RV64D-NEXT:    vsrl.vx v10, v10, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m1, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v10, v8
+; RV64D-NEXT:    vncvt.x.x.w v9, v10
 ; RV64D-NEXT:    li a0, 1023
-; RV64D-NEXT:    vsub.vx v8, v10, a0
+; RV64D-NEXT:    vsub.vx v9, v9, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    li a0, 32
-; RV64D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64D-NEXT:    vmerge.vxm v8, v9, a0, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 2 x i32> @llvm.cttz.nxv2i32(<vscale x 2 x i32> %va, i1 false)
   ret <vscale x 2 x i32> %a
@@ -1255,19 +1255,19 @@ define <vscale x 4 x i32> @cttz_nxv4i32(<vscale x 4 x i32> %va) {
 ; RV64D-LABEL: cttz_nxv4i32:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v10, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v10
-; RV64D-NEXT:    vfwcvt.f.xu.v v12, v8
+; RV64D-NEXT:    vand.vv v10, v8, v10
+; RV64D-NEXT:    vfwcvt.f.xu.v v12, v10
 ; RV64D-NEXT:    li a0, 52
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m4, ta, mu
-; RV64D-NEXT:    vsrl.vx v8, v12, a0
+; RV64D-NEXT:    vsrl.vx v12, v12, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m2, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v12, v8
+; RV64D-NEXT:    vncvt.x.x.w v10, v12
 ; RV64D-NEXT:    li a0, 1023
-; RV64D-NEXT:    vsub.vx v8, v12, a0
+; RV64D-NEXT:    vsub.vx v10, v10, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    li a0, 32
-; RV64D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64D-NEXT:    vmerge.vxm v8, v10, a0, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 4 x i32> @llvm.cttz.nxv4i32(<vscale x 4 x i32> %va, i1 false)
   ret <vscale x 4 x i32> %a
@@ -1354,19 +1354,19 @@ define <vscale x 8 x i32> @cttz_nxv8i32(<vscale x 8 x i32> %va) {
 ; RV64D-LABEL: cttz_nxv8i32:
 ; RV64D:       # %bb.0:
 ; RV64D-NEXT:    vsetvli a0, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vmseq.vx v0, v8, zero
 ; RV64D-NEXT:    vrsub.vi v12, v8, 0
-; RV64D-NEXT:    vand.vv v8, v8, v12
-; RV64D-NEXT:    vfwcvt.f.xu.v v16, v8
+; RV64D-NEXT:    vand.vv v12, v8, v12
+; RV64D-NEXT:    vfwcvt.f.xu.v v16, v12
 ; RV64D-NEXT:    li a0, 52
 ; RV64D-NEXT:    vsetvli zero, zero, e64, m8, ta, mu
-; RV64D-NEXT:    vsrl.vx v8, v16, a0
+; RV64D-NEXT:    vsrl.vx v16, v16, a0
 ; RV64D-NEXT:    vsetvli zero, zero, e32, m4, ta, mu
-; RV64D-NEXT:    vncvt.x.x.w v16, v8
+; RV64D-NEXT:    vncvt.x.x.w v12, v16
 ; RV64D-NEXT:    li a0, 1023
-; RV64D-NEXT:    vsub.vx v8, v16, a0
+; RV64D-NEXT:    vsub.vx v12, v12, a0
+; RV64D-NEXT:    vmseq.vi v0, v8, 0
 ; RV64D-NEXT:    li a0, 32
-; RV64D-NEXT:    vmerge.vxm v8, v8, a0, v0
+; RV64D-NEXT:    vmerge.vxm v8, v12, a0, v0
 ; RV64D-NEXT:    ret
   %a = call <vscale x 8 x i32> @llvm.cttz.nxv8i32(<vscale x 8 x i32> %va, i1 false)
   ret <vscale x 8 x i32> %a


        


More information about the llvm-commits mailing list