[llvm] [SDAG] Construct constants via instructions if materialization is costly (PR #86659)

Wang Pengcheng via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 26 22:39:42 PDT 2024


https://github.com/wangpc-pp updated https://github.com/llvm/llvm-project/pull/86659

>From a0c8ef0ef0324f3d52f3d9d271011a4a21874a8f Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Tue, 26 Mar 2024 17:50:27 +0800
Subject: [PATCH 1/2] [SDAG] Construct constants via instructions if
 materialization is costly

For some targets like RISCV, it is costly to materialize constants
used in lowering `ISD::CTPOP`/`ISD::VP_CTPOP`.

We can query the materialization cost via `TargetTransformInfo::getIntImmCost`
and if the cost is larger than 2, we should construct the constant
via two instructions.

This fixes #86207.
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |   79 +-
 llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll    |  288 +-
 llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll   |  156 +-
 llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll       | 1946 ++++---
 llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll    |  376 +-
 llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll        | 2134 ++++----
 .../RISCV/rvv/fixed-vectors-ctlz-vp.ll        | 4256 +++++++++-------
 .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll   |  140 +-
 .../RISCV/rvv/fixed-vectors-ctpop-vp.ll       | 1989 +++++---
 .../CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll  |   78 +-
 .../RISCV/rvv/fixed-vectors-cttz-vp.ll        | 4464 ++++++++++-------
 .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll   |  188 +-
 12 files changed, 9235 insertions(+), 6859 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 8be03b66e155f6..566a76a09d783f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -12,6 +12,7 @@
 
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/CodeGenCommonISel.h"
@@ -8666,14 +8667,32 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
   if (VT.isVector() && !canExpandVectorCTPOP(*this, VT))
     return SDValue();
 
+  const auto &TLI = DAG.getTargetLoweringInfo();
+  const auto &TTI = TLI.getTargetMachine().getTargetTransformInfo(
+      DAG.getMachineFunction().getFunction());
+  Type *VTTy = VT.getScalarType().getTypeForEVT(*DAG.getContext());
+
   // This is the "best" algorithm from
   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-  SDValue Mask55 =
-      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
+  // 0x0F0F0F0F...
+  const APInt &Constant0F = APInt::getSplat(Len, APInt(8, 0x0F));
+  SDValue Mask0F = DAG.getConstant(Constant0F, dl, VT);
+  // 0x33333333... = (0x0F0F0F0F... ^ (0x0F0F0F0F... << 2))
+  const APInt &Constant33 = APInt::getSplat(Len, APInt(8, 0x33));
   SDValue Mask33 =
-      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
-  SDValue Mask0F =
-      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
+      TTI.getIntImmCost(Constant33, VTTy, TargetTransformInfo::TCK_Latency) > 2
+          ? DAG.getNode(ISD::XOR, dl, VT, Mask0F,
+                        DAG.getNode(ISD::SHL, dl, VT, Mask0F,
+                                    DAG.getShiftAmountConstant(2, VT, dl)))
+          : DAG.getConstant(Constant33, dl, VT);
+  // 0x55555555... = (0x33333333... ^ (0x33333333... << 1))
+  const APInt &Constant55 = APInt::getSplat(Len, APInt(8, 0x55));
+  SDValue Mask55 =
+      TTI.getIntImmCost(Constant55, VTTy, TargetTransformInfo::TCK_Latency) > 2
+          ? DAG.getNode(ISD::XOR, dl, VT, Mask33,
+                        DAG.getNode(ISD::SHL, dl, VT, Mask33,
+                                    DAG.getShiftAmountConstant(1, VT, dl)))
+          : DAG.getConstant(Constant55, dl, VT);
 
   // v = v - ((v >> 1) & 0x55555555...)
   Op = DAG.getNode(ISD::SUB, dl, VT, Op,
@@ -8710,8 +8729,14 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
   }
 
   // v = (v * 0x01010101...) >> (Len - 8)
+  // 0x01010101... == (0x0F0F0F0F... & (0x0F0F0F0F... >> 3))
+  const APInt &Constant01 = APInt::getSplat(Len, APInt(8, 0x01));
   SDValue Mask01 =
-      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+      TTI.getIntImmCost(Constant01, VTTy, TargetTransformInfo::TCK_Latency) > 2
+          ? DAG.getNode(ISD::AND, dl, VT, Mask0F,
+                        DAG.getNode(ISD::SRL, dl, VT, Mask0F,
+                                    DAG.getShiftAmountConstant(3, VT, dl)))
+          : DAG.getConstant(Constant01, dl, VT);
   return DAG.getNode(ISD::SRL, dl, VT,
                      DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
                      DAG.getConstant(Len - 8, dl, ShVT));
@@ -8731,14 +8756,36 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
   if (!(Len <= 128 && Len % 8 == 0))
     return SDValue();
 
+  const auto &TLI = DAG.getTargetLoweringInfo();
+  const auto &TTI = TLI.getTargetMachine().getTargetTransformInfo(
+      DAG.getMachineFunction().getFunction());
+  Type *VTTy = VT.getScalarType().getTypeForEVT(*DAG.getContext());
+
   // This is same algorithm of expandCTPOP from
   // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
-  SDValue Mask55 =
-      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), dl, VT);
+  // 0x0F0F0F0F...
+  const APInt &Constant0F = APInt::getSplat(Len, APInt(8, 0x0F));
+  SDValue Mask0F = DAG.getConstant(Constant0F, dl, VT);
+  // 0x33333333... = (0x0F0F0F0F... ^ (0x0F0F0F0F... << 2))
+  const APInt &Constant33 = APInt::getSplat(Len, APInt(8, 0x33));
   SDValue Mask33 =
-      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), dl, VT);
-  SDValue Mask0F =
-      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), dl, VT);
+      TTI.getIntImmCost(Constant33, VTTy, TargetTransformInfo::TCK_Latency) > 2
+          ? DAG.getNode(ISD::VP_XOR, dl, VT, Mask0F,
+                        DAG.getNode(ISD::VP_SHL, dl, VT, Mask0F,
+                                    DAG.getShiftAmountConstant(2, VT, dl), Mask,
+                                    VL),
+                        Mask, VL)
+          : DAG.getConstant(Constant33, dl, VT);
+  // 0x55555555... = (0x33333333... ^ (0x33333333... << 1))
+  const APInt &Constant55 = APInt::getSplat(Len, APInt(8, 0x55));
+  SDValue Mask55 =
+      TTI.getIntImmCost(Constant55, VTTy, TargetTransformInfo::TCK_Latency) > 2
+          ? DAG.getNode(ISD::VP_XOR, dl, VT, Mask33,
+                        DAG.getNode(ISD::VP_SHL, dl, VT, Mask33,
+                                    DAG.getShiftAmountConstant(1, VT, dl), Mask,
+                                    VL),
+                        Mask, VL)
+          : DAG.getConstant(Constant55, dl, VT);
 
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
 
@@ -8767,8 +8814,16 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
     return Op;
 
   // v = (v * 0x01010101...) >> (Len - 8)
+  // 0x01010101... == (0x0F0F0F0F... & (0x0F0F0F0F... >> 3))
+  const APInt &Constant01 = APInt::getSplat(Len, APInt(8, 0x01));
   SDValue Mask01 =
-      DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), dl, VT);
+      TTI.getIntImmCost(Constant01, VTTy, TargetTransformInfo::TCK_Latency) > 2
+          ? DAG.getNode(ISD::VP_AND, dl, VT, Mask0F,
+                        DAG.getNode(ISD::VP_LSHR, dl, VT, Mask0F,
+                                    DAG.getShiftAmountConstant(3, VT, dl), Mask,
+                                    VL),
+                        Mask, VL)
+          : DAG.getConstant(Constant01, dl, VT);
   return DAG.getNode(ISD::VP_LSHR, dl, VT,
                      DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
                      DAG.getConstant(Len - 8, dl, ShVT), Mask, VL);
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
index fc94f8c2a52797..e6f033937ec286 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll
@@ -1147,30 +1147,24 @@ define <vscale x 1 x i64> @ctlz_nxv1i64(<vscale x 1 x i64> %va) {
 ; RV32I-NEXT:    vor.vv v8, v8, v9
 ; RV32I-NEXT:    vnot.v v8, v8
 ; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
+; RV32I-NEXT:    lui a0, 61681
+; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV32I-NEXT:    vmv.v.x v10, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v9, v9, v10
+; RV32I-NEXT:    vsll.vi v11, v10, 2
+; RV32I-NEXT:    vxor.vv v11, v10, v11
+; RV32I-NEXT:    vadd.vv v12, v11, v11
+; RV32I-NEXT:    vxor.vv v12, v11, v12
+; RV32I-NEXT:    vand.vv v9, v9, v12
 ; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v10, v8, v9
+; RV32I-NEXT:    vand.vv v9, v8, v11
 ; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vadd.vv v8, v10, v8
+; RV32I-NEXT:    vand.vv v8, v8, v11
+; RV32I-NEXT:    vadd.vv v8, v9, v8
 ; RV32I-NEXT:    vsrl.vi v9, v8, 4
 ; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v8, v8, v9
+; RV32I-NEXT:    vand.vv v8, v8, v10
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
@@ -1288,30 +1282,24 @@ define <vscale x 2 x i64> @ctlz_nxv2i64(<vscale x 2 x i64> %va) {
 ; RV32I-NEXT:    vor.vv v8, v8, v10
 ; RV32I-NEXT:    vnot.v v8, v8
 ; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
+; RV32I-NEXT:    lui a0, 61681
+; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV32I-NEXT:    vmv.v.x v12, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v10, v10, v12
+; RV32I-NEXT:    vsll.vi v14, v12, 2
+; RV32I-NEXT:    vxor.vv v14, v12, v14
+; RV32I-NEXT:    vadd.vv v16, v14, v14
+; RV32I-NEXT:    vxor.vv v16, v14, v16
+; RV32I-NEXT:    vand.vv v10, v10, v16
 ; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v12, v8, v10
+; RV32I-NEXT:    vand.vv v10, v8, v14
 ; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vadd.vv v8, v12, v8
+; RV32I-NEXT:    vand.vv v8, v8, v14
+; RV32I-NEXT:    vadd.vv v8, v10, v8
 ; RV32I-NEXT:    vsrl.vi v10, v8, 4
 ; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vand.vv v8, v8, v12
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -1429,30 +1417,24 @@ define <vscale x 4 x i64> @ctlz_nxv4i64(<vscale x 4 x i64> %va) {
 ; RV32I-NEXT:    vor.vv v8, v8, v12
 ; RV32I-NEXT:    vnot.v v8, v8
 ; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
+; RV32I-NEXT:    lui a0, 61681
+; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; RV32I-NEXT:    vmv.v.x v16, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32I-NEXT:    vand.vv v12, v12, v16
+; RV32I-NEXT:    vsll.vi v20, v16, 2
+; RV32I-NEXT:    vxor.vv v20, v16, v20
+; RV32I-NEXT:    vadd.vv v24, v20, v20
+; RV32I-NEXT:    vxor.vv v24, v20, v24
+; RV32I-NEXT:    vand.vv v12, v12, v24
 ; RV32I-NEXT:    vsub.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32I-NEXT:    vmv.v.x v12, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32I-NEXT:    vand.vv v16, v8, v12
+; RV32I-NEXT:    vand.vv v12, v8, v20
 ; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v12
-; RV32I-NEXT:    vadd.vv v8, v16, v8
+; RV32I-NEXT:    vand.vv v8, v8, v20
+; RV32I-NEXT:    vadd.vv v8, v12, v8
 ; RV32I-NEXT:    vsrl.vi v12, v8, 4
 ; RV32I-NEXT:    vadd.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32I-NEXT:    vmv.v.x v12, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32I-NEXT:    vand.vv v8, v8, v12
+; RV32I-NEXT:    vand.vv v8, v8, v16
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
@@ -1554,6 +1536,12 @@ declare <vscale x 4 x i64> @llvm.ctlz.nxv4i64(<vscale x 4 x i64>, i1)
 define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32I-LABEL: ctlz_nxv8i64:
 ; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    sub sp, sp, a0
+; RV32I-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; RV32I-NEXT:    vsrl.vi v16, v8, 1
 ; RV32I-NEXT:    vor.vv v8, v8, v16
@@ -1569,31 +1557,39 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32I-NEXT:    vsrl.vx v16, v8, a0
 ; RV32I-NEXT:    vor.vv v8, v8, v16
 ; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v16, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vmv.v.x v24, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32I-NEXT:    vand.vv v16, v16, v24
-; RV32I-NEXT:    vsub.vv v8, v8, v16
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vmv.v.x v16, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32I-NEXT:    vand.vv v24, v8, v16
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v16
-; RV32I-NEXT:    vadd.vv v8, v24, v8
-; RV32I-NEXT:    vsrl.vi v16, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v16
+; RV32I-NEXT:    addi a0, sp, 16
+; RV32I-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32I-NEXT:    vsrl.vi v8, v8, 1
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    addi a0, a0, 16
+; RV32I-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; RV32I-NEXT:    lui a0, 61681
 ; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vmv.v.x v16, a0
+; RV32I-NEXT:    vmv.v.x v8, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32I-NEXT:    vand.vv v8, v8, v16
+; RV32I-NEXT:    vsll.vi v0, v8, 2
+; RV32I-NEXT:    vxor.vv v0, v8, v0
+; RV32I-NEXT:    vadd.vv v24, v0, v0
+; RV32I-NEXT:    vxor.vv v24, v0, v24
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    addi a0, a0, 16
+; RV32I-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32I-NEXT:    vand.vv v24, v16, v24
+; RV32I-NEXT:    addi a0, sp, 16
+; RV32I-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32I-NEXT:    vsub.vv v16, v16, v24
+; RV32I-NEXT:    vand.vv v24, v16, v0
+; RV32I-NEXT:    vsrl.vi v16, v16, 2
+; RV32I-NEXT:    vand.vv v16, v16, v0
+; RV32I-NEXT:    vadd.vv v16, v24, v16
+; RV32I-NEXT:    vsrl.vi v24, v16, 4
+; RV32I-NEXT:    vadd.vv v16, v16, v24
+; RV32I-NEXT:    vand.vv v8, v16, v8
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
@@ -1602,6 +1598,10 @@ define <vscale x 8 x i64> @ctlz_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32I-NEXT:    vmul.vv v8, v8, v16
 ; RV32I-NEXT:    li a0, 56
 ; RV32I-NEXT:    vsrl.vx v8, v8, a0
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add sp, sp, a0
+; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: ctlz_nxv8i64:
@@ -2753,30 +2753,24 @@ define <vscale x 1 x i64> @ctlz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
 ; RV32I-NEXT:    vor.vv v8, v8, v9
 ; RV32I-NEXT:    vnot.v v8, v8
 ; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
+; RV32I-NEXT:    lui a0, 61681
+; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV32I-NEXT:    vmv.v.x v10, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v9, v9, v10
+; RV32I-NEXT:    vsll.vi v11, v10, 2
+; RV32I-NEXT:    vxor.vv v11, v10, v11
+; RV32I-NEXT:    vadd.vv v12, v11, v11
+; RV32I-NEXT:    vxor.vv v12, v11, v12
+; RV32I-NEXT:    vand.vv v9, v9, v12
 ; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v10, v8, v9
+; RV32I-NEXT:    vand.vv v9, v8, v11
 ; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vadd.vv v8, v10, v8
+; RV32I-NEXT:    vand.vv v8, v8, v11
+; RV32I-NEXT:    vadd.vv v8, v9, v8
 ; RV32I-NEXT:    vsrl.vi v9, v8, 4
 ; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v8, v8, v9
+; RV32I-NEXT:    vand.vv v8, v8, v10
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
@@ -2889,30 +2883,24 @@ define <vscale x 2 x i64> @ctlz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
 ; RV32I-NEXT:    vor.vv v8, v8, v10
 ; RV32I-NEXT:    vnot.v v8, v8
 ; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
+; RV32I-NEXT:    lui a0, 61681
+; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV32I-NEXT:    vmv.v.x v12, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v10, v10, v12
+; RV32I-NEXT:    vsll.vi v14, v12, 2
+; RV32I-NEXT:    vxor.vv v14, v12, v14
+; RV32I-NEXT:    vadd.vv v16, v14, v14
+; RV32I-NEXT:    vxor.vv v16, v14, v16
+; RV32I-NEXT:    vand.vv v10, v10, v16
 ; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v12, v8, v10
+; RV32I-NEXT:    vand.vv v10, v8, v14
 ; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vadd.vv v8, v12, v8
+; RV32I-NEXT:    vand.vv v8, v8, v14
+; RV32I-NEXT:    vadd.vv v8, v10, v8
 ; RV32I-NEXT:    vsrl.vi v10, v8, 4
 ; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vand.vv v8, v8, v12
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
@@ -3025,30 +3013,24 @@ define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
 ; RV32I-NEXT:    vor.vv v8, v8, v12
 ; RV32I-NEXT:    vnot.v v8, v8
 ; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
+; RV32I-NEXT:    lui a0, 61681
+; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; RV32I-NEXT:    vmv.v.x v16, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32I-NEXT:    vand.vv v12, v12, v16
+; RV32I-NEXT:    vsll.vi v20, v16, 2
+; RV32I-NEXT:    vxor.vv v20, v16, v20
+; RV32I-NEXT:    vadd.vv v24, v20, v20
+; RV32I-NEXT:    vxor.vv v24, v20, v24
+; RV32I-NEXT:    vand.vv v12, v12, v24
 ; RV32I-NEXT:    vsub.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32I-NEXT:    vmv.v.x v12, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32I-NEXT:    vand.vv v16, v8, v12
+; RV32I-NEXT:    vand.vv v12, v8, v20
 ; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v12
-; RV32I-NEXT:    vadd.vv v8, v16, v8
+; RV32I-NEXT:    vand.vv v8, v8, v20
+; RV32I-NEXT:    vadd.vv v8, v12, v8
 ; RV32I-NEXT:    vsrl.vi v12, v8, 4
 ; RV32I-NEXT:    vadd.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 61681
-; RV32I-NEXT:    addi a0, a0, -241
-; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32I-NEXT:    vmv.v.x v12, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32I-NEXT:    vand.vv v8, v8, v12
+; RV32I-NEXT:    vand.vv v8, v8, v16
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
@@ -3145,6 +3127,12 @@ define <vscale x 4 x i64> @ctlz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
 define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32I-LABEL: ctlz_zero_undef_nxv8i64:
 ; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    sub sp, sp, a0
+; RV32I-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
 ; RV32I-NEXT:    vsrl.vi v16, v8, 1
 ; RV32I-NEXT:    vor.vv v8, v8, v16
@@ -3160,31 +3148,39 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32I-NEXT:    vsrl.vx v16, v8, a0
 ; RV32I-NEXT:    vor.vv v8, v8, v16
 ; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vsrl.vi v16, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vmv.v.x v24, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32I-NEXT:    vand.vv v16, v16, v24
-; RV32I-NEXT:    vsub.vv v8, v8, v16
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vmv.v.x v16, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32I-NEXT:    vand.vv v24, v8, v16
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v16
-; RV32I-NEXT:    vadd.vv v8, v24, v8
-; RV32I-NEXT:    vsrl.vi v16, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v16
+; RV32I-NEXT:    addi a0, sp, 16
+; RV32I-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32I-NEXT:    vsrl.vi v8, v8, 1
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    addi a0, a0, 16
+; RV32I-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; RV32I-NEXT:    lui a0, 61681
 ; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vmv.v.x v16, a0
+; RV32I-NEXT:    vmv.v.x v8, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32I-NEXT:    vand.vv v8, v8, v16
+; RV32I-NEXT:    vsll.vi v0, v8, 2
+; RV32I-NEXT:    vxor.vv v0, v8, v0
+; RV32I-NEXT:    vadd.vv v24, v0, v0
+; RV32I-NEXT:    vxor.vv v24, v0, v24
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    addi a0, a0, 16
+; RV32I-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32I-NEXT:    vand.vv v24, v16, v24
+; RV32I-NEXT:    addi a0, sp, 16
+; RV32I-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32I-NEXT:    vsub.vv v16, v16, v24
+; RV32I-NEXT:    vand.vv v24, v16, v0
+; RV32I-NEXT:    vsrl.vi v16, v16, 2
+; RV32I-NEXT:    vand.vv v16, v16, v0
+; RV32I-NEXT:    vadd.vv v16, v24, v16
+; RV32I-NEXT:    vsrl.vi v24, v16, 4
+; RV32I-NEXT:    vadd.vv v16, v16, v24
+; RV32I-NEXT:    vand.vv v8, v16, v8
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
@@ -3193,6 +3189,10 @@ define <vscale x 8 x i64> @ctlz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32I-NEXT:    vmul.vv v8, v8, v16
 ; RV32I-NEXT:    li a0, 56
 ; RV32I-NEXT:    vsrl.vx v8, v8, a0
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add sp, sp, a0
+; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: ctlz_zero_undef_nxv8i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll
index c310274d685081..16a8052e821ad3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-sdnode.ll
@@ -675,31 +675,24 @@ declare <vscale x 16 x i32> @llvm.ctpop.nxv16i32(<vscale x 16 x i32>)
 define <vscale x 1 x i64> @ctpop_nxv1i64(<vscale x 1 x i64> %va) {
 ; RV32-LABEL: ctpop_nxv1i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a0
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a0
-; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
 ; RV32-NEXT:    lui a0, 61681
 ; RV32-NEXT:    addi a0, a0, -241
 ; RV32-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v9, a0
 ; RV32-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV32-NEXT:    vsll.vi v10, v9, 2
+; RV32-NEXT:    vxor.vv v10, v9, v10
+; RV32-NEXT:    vadd.vv v11, v10, v10
+; RV32-NEXT:    vxor.vv v11, v10, v11
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    vand.vv v11, v12, v11
+; RV32-NEXT:    vsub.vv v8, v8, v11
+; RV32-NEXT:    vand.vv v11, v8, v10
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vadd.vv v8, v11, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    vand.vv v8, v8, v9
 ; RV32-NEXT:    lui a0, 4112
 ; RV32-NEXT:    addi a0, a0, 257
@@ -758,31 +751,24 @@ declare <vscale x 1 x i64> @llvm.ctpop.nxv1i64(<vscale x 1 x i64>)
 define <vscale x 2 x i64> @ctpop_nxv2i64(<vscale x 2 x i64> %va) {
 ; RV32-LABEL: ctpop_nxv2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a0
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a0
-; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    lui a0, 61681
 ; RV32-NEXT:    addi a0, a0, -241
 ; RV32-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a0
 ; RV32-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV32-NEXT:    vsll.vi v12, v10, 2
+; RV32-NEXT:    vxor.vv v12, v10, v12
+; RV32-NEXT:    vadd.vv v14, v12, v12
+; RV32-NEXT:    vxor.vv v14, v12, v14
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vand.vv v14, v16, v14
+; RV32-NEXT:    vsub.vv v8, v8, v14
+; RV32-NEXT:    vand.vv v14, v8, v12
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vadd.vv v8, v14, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    vand.vv v8, v8, v10
 ; RV32-NEXT:    lui a0, 4112
 ; RV32-NEXT:    addi a0, a0, 257
@@ -841,31 +827,24 @@ declare <vscale x 2 x i64> @llvm.ctpop.nxv2i64(<vscale x 2 x i64>)
 define <vscale x 4 x i64> @ctpop_nxv4i64(<vscale x 4 x i64> %va) {
 ; RV32-LABEL: ctpop_nxv4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a0
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a0
-; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    lui a0, 61681
 ; RV32-NEXT:    addi a0, a0, -241
 ; RV32-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a0
 ; RV32-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32-NEXT:    vsll.vi v16, v12, 2
+; RV32-NEXT:    vxor.vv v16, v12, v16
+; RV32-NEXT:    vadd.vv v20, v16, v16
+; RV32-NEXT:    vxor.vv v20, v16, v20
+; RV32-NEXT:    vsrl.vi v24, v8, 1
+; RV32-NEXT:    vand.vv v20, v24, v20
+; RV32-NEXT:    vsub.vv v8, v8, v20
+; RV32-NEXT:    vand.vv v20, v8, v16
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vadd.vv v8, v20, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
 ; RV32-NEXT:    vand.vv v8, v8, v12
 ; RV32-NEXT:    lui a0, 4112
 ; RV32-NEXT:    addi a0, a0, 257
@@ -924,31 +903,44 @@ declare <vscale x 4 x i64> @llvm.ctpop.nxv4i64(<vscale x 4 x i64>)
 define <vscale x 8 x i64> @ctpop_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32-LABEL: ctpop_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a0, 349525
-; RV32-NEXT:    addi a0, a0, 1365
-; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a0
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a0, 209715
-; RV32-NEXT:    addi a0, a0, 819
-; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a0
-; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    sub sp, sp, a0
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a0, 61681
 ; RV32-NEXT:    addi a0, a0, -241
 ; RV32-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
 ; RV32-NEXT:    vmv.v.x v16, a0
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v24, v16, 2
+; RV32-NEXT:    vxor.vv v24, v16, v24
+; RV32-NEXT:    vadd.vv v0, v24, v24
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v16
+; RV32-NEXT:    vsub.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v0, v8, v24
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v24
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16
 ; RV32-NEXT:    lui a0, 4112
 ; RV32-NEXT:    addi a0, a0, 257
@@ -958,6 +950,10 @@ define <vscale x 8 x i64> @ctpop_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: ctpop_nxv8i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
index 2310f85b1fba93..fc7f50e4048666 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
@@ -1221,37 +1221,27 @@ declare <vscale x 1 x i64> @llvm.vp.ctpop.nxv1i64(<vscale x 1 x i64>, <vscale x
 define <vscale x 1 x i64> @vp_ctpop_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_nxv1i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsll.vi v10, v9, 2, v0.t
+; RV32-NEXT:    vxor.vv v10, v9, v10, v0.t
+; RV32-NEXT:    vsll.vi v11, v10, 1, v0.t
+; RV32-NEXT:    vxor.vv v11, v10, v11, v0.t
+; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v11, v12, v11, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v11, v0.t
+; RV32-NEXT:    vand.vv v11, v8, v10, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV32-NEXT:    vadd.vv v8, v11, v8, v0.t
+; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v9, 3, v0.t
+; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -1259,34 +1249,30 @@ define <vscale x 1 x i64> @vp_ctpop_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
 ;
 ; RV64-LABEL: vp_ctpop_nxv1i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
+; RV64-NEXT:    vsll.vi v10, v9, 2, v0.t
+; RV64-NEXT:    vxor.vx v10, v10, a1, v0.t
+; RV64-NEXT:    vsll.vi v11, v10, 1, v0.t
+; RV64-NEXT:    vxor.vv v11, v10, v11, v0.t
+; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v11, v12, v11, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v11, v0.t
+; RV64-NEXT:    vand.vv v11, v8, v10, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vadd.vv v8, v11, v8, v0.t
+; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v9, v9, 3, v0.t
+; RV64-NEXT:    vand.vx v9, v9, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -1303,37 +1289,27 @@ define <vscale x 1 x i64> @vp_ctpop_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
 define <vscale x 1 x i64> @vp_ctpop_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_nxv1i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsll.vi v10, v9, 2
+; RV32-NEXT:    vxor.vv v10, v9, v10
+; RV32-NEXT:    vadd.vv v11, v10, v10
+; RV32-NEXT:    vxor.vv v11, v10, v11
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    vand.vv v11, v12, v11
+; RV32-NEXT:    vsub.vv v8, v8, v11
+; RV32-NEXT:    vand.vv v11, v8, v10
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vadd.vv v8, v11, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v9, 3
+; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -1341,34 +1317,30 @@ define <vscale x 1 x i64> @vp_ctpop_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32
 ;
 ; RV64-LABEL: vp_ctpop_nxv1i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0
+; RV64-NEXT:    vsll.vi v10, v9, 2
+; RV64-NEXT:    vxor.vx v10, v10, a1
+; RV64-NEXT:    vadd.vv v11, v10, v10
+; RV64-NEXT:    vxor.vv v11, v10, v11
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vv v11, v12, v11
+; RV64-NEXT:    vsub.vv v8, v8, v11
+; RV64-NEXT:    vand.vv v11, v8, v10
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v10
+; RV64-NEXT:    vadd.vv v8, v11, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v10
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v9, v9, 3
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -1389,37 +1361,27 @@ declare <vscale x 2 x i64> @llvm.vp.ctpop.nxv2i64(<vscale x 2 x i64>, <vscale x
 define <vscale x 2 x i64> @vp_ctpop_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_nxv2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsll.vi v12, v10, 2, v0.t
+; RV32-NEXT:    vxor.vv v12, v10, v12, v0.t
+; RV32-NEXT:    vsll.vi v14, v12, 1, v0.t
+; RV32-NEXT:    vxor.vv v14, v12, v14, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v14, v16, v14, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v14, v0.t
+; RV32-NEXT:    vand.vv v14, v8, v12, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vadd.vv v8, v14, v8, v0.t
+; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v10, 3, v0.t
+; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -1427,34 +1389,30 @@ define <vscale x 2 x i64> @vp_ctpop_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
 ;
 ; RV64-LABEL: vp_ctpop_nxv2i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
+; RV64-NEXT:    vsll.vi v12, v10, 2, v0.t
+; RV64-NEXT:    vxor.vx v12, v12, a1, v0.t
+; RV64-NEXT:    vsll.vi v14, v12, 1, v0.t
+; RV64-NEXT:    vxor.vv v14, v12, v14, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v14, v16, v14, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v14, v0.t
+; RV64-NEXT:    vand.vv v14, v8, v12, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vadd.vv v8, v14, v8, v0.t
+; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v10, v10, 3, v0.t
+; RV64-NEXT:    vand.vx v10, v10, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -1471,37 +1429,27 @@ define <vscale x 2 x i64> @vp_ctpop_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
 define <vscale x 2 x i64> @vp_ctpop_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_nxv2i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsll.vi v12, v10, 2
+; RV32-NEXT:    vxor.vv v12, v10, v12
+; RV32-NEXT:    vadd.vv v14, v12, v12
+; RV32-NEXT:    vxor.vv v14, v12, v14
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vand.vv v14, v16, v14
+; RV32-NEXT:    vsub.vv v8, v8, v14
+; RV32-NEXT:    vand.vv v14, v8, v12
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vadd.vv v8, v14, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v10, 3
+; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -1509,34 +1457,30 @@ define <vscale x 2 x i64> @vp_ctpop_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32
 ;
 ; RV64-LABEL: vp_ctpop_nxv2i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0
+; RV64-NEXT:    vsll.vi v12, v10, 2
+; RV64-NEXT:    vxor.vx v12, v12, a1
+; RV64-NEXT:    vadd.vv v14, v12, v12
+; RV64-NEXT:    vxor.vv v14, v12, v14
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vand.vv v14, v16, v14
+; RV64-NEXT:    vsub.vv v8, v8, v14
+; RV64-NEXT:    vand.vv v14, v8, v12
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v12
+; RV64-NEXT:    vadd.vv v8, v14, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v12
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v10, v10, 3
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vmul.vv v8, v8, v10
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -1557,37 +1501,27 @@ declare <vscale x 4 x i64> @llvm.vp.ctpop.nxv4i64(<vscale x 4 x i64>, <vscale x
 define <vscale x 4 x i64> @vp_ctpop_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_nxv4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsll.vi v16, v12, 2, v0.t
+; RV32-NEXT:    vxor.vv v16, v12, v16, v0.t
+; RV32-NEXT:    vsll.vi v20, v16, 1, v0.t
+; RV32-NEXT:    vxor.vv v20, v16, v20, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v20, v24, v20, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v20, v0.t
+; RV32-NEXT:    vand.vv v20, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vadd.vv v8, v20, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v12, 3, v0.t
+; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -1595,34 +1529,30 @@ define <vscale x 4 x i64> @vp_ctpop_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
 ;
 ; RV64-LABEL: vp_ctpop_nxv4i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
+; RV64-NEXT:    vsll.vi v16, v12, 2, v0.t
+; RV64-NEXT:    vxor.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vsll.vi v20, v16, 1, v0.t
+; RV64-NEXT:    vxor.vv v20, v16, v20, v0.t
+; RV64-NEXT:    vsrl.vi v24, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v20, v24, v20, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v20, v0.t
+; RV64-NEXT:    vand.vv v20, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vadd.vv v8, v20, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v12, v12, 3, v0.t
+; RV64-NEXT:    vand.vx v12, v12, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -1639,37 +1569,27 @@ define <vscale x 4 x i64> @vp_ctpop_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
 define <vscale x 4 x i64> @vp_ctpop_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_nxv4i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsll.vi v16, v12, 2
+; RV32-NEXT:    vxor.vv v16, v12, v16
+; RV32-NEXT:    vadd.vv v20, v16, v16
+; RV32-NEXT:    vxor.vv v20, v16, v20
+; RV32-NEXT:    vsrl.vi v24, v8, 1
+; RV32-NEXT:    vand.vv v20, v24, v20
+; RV32-NEXT:    vsub.vv v8, v8, v20
+; RV32-NEXT:    vand.vv v20, v8, v16
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vadd.vv v8, v20, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
 ; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v12, 3
+; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v12
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -1677,34 +1597,30 @@ define <vscale x 4 x i64> @vp_ctpop_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32
 ;
 ; RV64-LABEL: vp_ctpop_nxv4i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0
+; RV64-NEXT:    vsll.vi v16, v12, 2
+; RV64-NEXT:    vxor.vx v16, v16, a1
+; RV64-NEXT:    vadd.vv v20, v16, v16
+; RV64-NEXT:    vxor.vv v20, v16, v20
+; RV64-NEXT:    vsrl.vi v24, v8, 1
+; RV64-NEXT:    vand.vv v20, v24, v20
+; RV64-NEXT:    vsub.vv v8, v8, v20
+; RV64-NEXT:    vand.vv v20, v8, v16
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vadd.vv v8, v20, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v12, v12, 3
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vmul.vv v8, v8, v12
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -1725,74 +1641,163 @@ declare <vscale x 7 x i64> @llvm.vp.ctpop.nxv7i64(<vscale x 7 x i64>, <vscale x
 define <vscale x 7 x i64> @vp_ctpop_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_nxv7i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a1
+; RV32-NEXT:    vmv.v.x v8, a1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    vsll.vi v24, v8, 2, v0.t
+; RV32-NEXT:    vxor.vv v24, v8, v24, v0.t
+; RV32-NEXT:    vsll.vi v8, v24, 1, v0.t
+; RV32-NEXT:    vxor.vv v8, v24, v8, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_nxv7i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vsll.vi v24, v16, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v24, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 1, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v24, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v24, v24, v16, v0.t
+; RV64-NEXT:    vand.vv v16, v24, v8, v0.t
+; RV64-NEXT:    vsrl.vi v24, v24, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v24, v8, v0.t
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv7i64:
@@ -1807,74 +1812,98 @@ define <vscale x 7 x i64> @vp_ctpop_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7
 define <vscale x 7 x i64> @vp_ctpop_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_nxv7i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
 ; RV32-NEXT:    vmv.v.x v16, a1
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v24, v16, 2
+; RV32-NEXT:    vxor.vv v24, v16, v24
+; RV32-NEXT:    vadd.vv v0, v24, v24
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v16
+; RV32-NEXT:    vsub.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v0, v8, v24
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v24
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3
+; RV32-NEXT:    vand.vv v16, v16, v24
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_nxv7i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v24, v16, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v0, v24, v0
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
+; RV64-NEXT:    vand.vv v16, v16, v0
 ; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vand.vv v16, v8, v24
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
 ; RV64-NEXT:    vadd.vv v8, v16, v8
 ; RV64-NEXT:    vsrl.vi v16, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv7i64_unmasked:
@@ -1893,74 +1922,163 @@ declare <vscale x 8 x i64> @llvm.vp.ctpop.nxv8i64(<vscale x 8 x i64>, <vscale x
 define <vscale x 8 x i64> @vp_ctpop_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a1
+; RV32-NEXT:    vmv.v.x v8, a1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    vsll.vi v24, v8, 2, v0.t
+; RV32-NEXT:    vxor.vv v24, v8, v24, v0.t
+; RV32-NEXT:    vsll.vi v8, v24, 1, v0.t
+; RV32-NEXT:    vxor.vv v8, v24, v8, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_nxv8i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vsll.vi v24, v16, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v24, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 1, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v24, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v24, v24, v16, v0.t
+; RV64-NEXT:    vand.vv v16, v24, v8, v0.t
+; RV64-NEXT:    vsrl.vi v24, v24, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v24, v8, v0.t
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i64:
@@ -1975,74 +2093,98 @@ define <vscale x 8 x i64> @vp_ctpop_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
 define <vscale x 8 x i64> @vp_ctpop_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_nxv8i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
 ; RV32-NEXT:    vmv.v.x v16, a1
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v24, v16, 2
+; RV32-NEXT:    vxor.vv v24, v16, v24
+; RV32-NEXT:    vadd.vv v0, v24, v24
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v16
+; RV32-NEXT:    vsub.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v0, v8, v24
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v24
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3
+; RV32-NEXT:    vand.vv v16, v16, v24
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_nxv8i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v24, v16, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v0, v24, v0
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
+; RV64-NEXT:    vand.vv v16, v16, v0
 ; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vand.vv v16, v8, v24
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
 ; RV64-NEXT:    vadd.vv v8, v16, v8
 ; RV64-NEXT:    vsrl.vi v16, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv8i64_unmasked:
@@ -2064,20 +2206,19 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    li a2, 48
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV32-NEXT:    vmv1r.v v24, v0
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    li a2, 24
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 48
-; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    slli a1, a1, 5
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -2089,101 +2230,121 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
 ; RV32-NEXT:    sltu a3, a0, a2
 ; RV32-NEXT:    addi a3, a3, -1
 ; RV32-NEXT:    and a2, a3, a2
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a3, 349525
-; RV32-NEXT:    addi a3, a3, 1365
+; RV32-NEXT:    lui a3, 61681
+; RV32-NEXT:    addi a3, a3, -241
 ; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a3
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vmv.v.x v16, a3
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    li a4, 40
 ; RV32-NEXT:    mul a3, a3, a4
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 5
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    lui a3, 209715
-; RV32-NEXT:    addi a3, a3, 819
-; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a3
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 5
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 5
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
-; RV32-NEXT:    lui a3, 61681
-; RV32-NEXT:    addi a3, a3, -241
-; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a3
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 5
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
 ; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    lui a3, 4112
-; RV32-NEXT:    addi a3, a3, 257
-; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a3
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    csrr a2, vlenb
 ; RV32-NEXT:    slli a2, a2, 4
 ; RV32-NEXT:    add a2, sp, a2
 ; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
 ; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmul.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    li a2, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 3
+; RV32-NEXT:    li a4, 24
+; RV32-NEXT:    mul a3, a3, a4
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
@@ -2194,98 +2355,112 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
+; RV32-NEXT:    li a1, 40
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    addi a0, sp, 16
 ; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 24
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 40
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    vsub.vv v16, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 24
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, sp, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 40
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 24
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vmul.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    li a1, 48
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
 ; RV32-NEXT:    addi sp, sp, 16
@@ -2296,82 +2471,242 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64(<vscale x 16 x i64> %va, <vscale x
 ; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    li a2, 48
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    sub sp, sp, a1
-; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
+; RV64-NEXT:    vmv1r.v v24, v0
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    srli a2, a1, 3
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    srli a1, a2, 3
 ; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; RV64-NEXT:    vslidedown.vx v24, v0, a2
-; RV64-NEXT:    mv a2, a0
-; RV64-NEXT:    bltu a0, a1, .LBB46_2
-; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a2, a1
-; RV64-NEXT:  .LBB46_2:
-; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a2, 349525
-; RV64-NEXT:    addiw a2, a2, 1365
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a3, 209715
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
+; RV64-NEXT:    vslidedown.vx v0, v0, a1
+; RV64-NEXT:    sub a1, a0, a2
+; RV64-NEXT:    sltu a3, a0, a1
+; RV64-NEXT:    addi a3, a3, -1
+; RV64-NEXT:    and a3, a3, a1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a4, a1, 32
+; RV64-NEXT:    add a1, a1, a4
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    li a5, 40
+; RV64-NEXT:    mul a4, a4, a5
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v8, v16, 1, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a4, 61681
-; RV64-NEXT:    addiw a4, a4, -241
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    lui a5, 4112
-; RV64-NEXT:    addiw a5, a5, 257
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a6, v0.t
-; RV64-NEXT:    addi a7, sp, 16
-; RV64-NEXT:    vs8r.v v8, (a7) # Unknown-size Folded Spill
-; RV64-NEXT:    sub a1, a0, a1
-; RV64-NEXT:    sltu a0, a0, a1
-; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a1
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 40
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a3, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    li a5, 24
+; RV64-NEXT:    mul a4, a4, a5
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    bltu a0, a2, .LBB46_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    mv a0, a2
+; RV64-NEXT:  .LBB46_2:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v8, v16, 1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 3
 ; RV64-NEXT:    add a0, sp, a0
 ; RV64-NEXT:    addi a0, a0, 16
 ; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vsub.vv v16, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v8, v16, a3, v0.t
-; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a3, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
-; RV64-NEXT:    vsrl.vx v16, v8, a6, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    addi a0, sp, 16
 ; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vx v8, v8, a3, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 48
+; RV64-NEXT:    mul a0, a0, a1
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
@@ -2407,162 +2742,215 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    sub a2, a0, a1
 ; RV32-NEXT:    sltu a3, a0, a2
 ; RV32-NEXT:    addi a3, a3, -1
 ; RV32-NEXT:    and a2, a3, a2
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v24, v16, 1
-; RV32-NEXT:    lui a3, 349525
-; RV32-NEXT:    addi a3, a3, 1365
-; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v0, a3
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vs8r.v v0, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vand.vv v24, v24, v0
-; RV32-NEXT:    vsub.vv v24, v16, v24
-; RV32-NEXT:    lui a3, 209715
-; RV32-NEXT:    addi a3, a3, 819
-; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v0, a3
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v24, v0
-; RV32-NEXT:    vsrl.vi v24, v24, 2
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vs8r.v v0, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vand.vv v24, v24, v0
-; RV32-NEXT:    vadd.vv v24, v16, v24
-; RV32-NEXT:    vsrl.vi v16, v24, 4
-; RV32-NEXT:    vadd.vv v16, v24, v16
 ; RV32-NEXT:    lui a3, 61681
 ; RV32-NEXT:    addi a3, a3, -241
 ; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
 ; RV32-NEXT:    vmv.v.x v24, a3
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v0, v24, 2
+; RV32-NEXT:    vxor.vv v8, v24, v0
+; RV32-NEXT:    vadd.vv v0, v8, v8
+; RV32-NEXT:    vxor.vv v16, v8, v0
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v0, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v0, v0, 1
+; RV32-NEXT:    vand.vv v16, v0, v16
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v0, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v0, v16
+; RV32-NEXT:    vand.vv v0, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v16, 2
+; RV32-NEXT:    vand.vv v8, v16, v8
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
+; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a2
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    slli a3, a3, 3
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vs8r.v v24, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    lui a3, 4112
-; RV32-NEXT:    addi a3, a3, 257
-; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a3
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    addi a2, sp, 16
-; RV32-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
-; RV32-NEXT:    vmul.vv v16, v16, v24
-; RV32-NEXT:    li a2, 56
-; RV32-NEXT:    vsrl.vx v16, v16, a2
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    bltu a0, a1, .LBB47_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    mv a0, a1
 ; RV32-NEXT:  .LBB47_2:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v24, v8, 1
+; RV32-NEXT:    vsll.vi v8, v24, 2
+; RV32-NEXT:    vxor.vv v8, v24, v8
+; RV32-NEXT:    vadd.vv v0, v8, v8
+; RV32-NEXT:    vxor.vv v16, v8, v0
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 24
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v24, v24, v0
-; RV32-NEXT:    vsub.vv v24, v8, v24
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v0, v16, 1
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v0, v16
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v24, v0
-; RV32-NEXT:    vsrl.vi v24, v24, 2
-; RV32-NEXT:    vand.vv v24, v24, v0
-; RV32-NEXT:    vadd.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v24, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v24
+; RV32-NEXT:    vsub.vv v16, v0, v16
+; RV32-NEXT:    vand.vv v0, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v16, 2
+; RV32-NEXT:    vand.vv v8, v16, v8
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
+; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vx v8, v8, a2
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v24
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vx v8, v8, a2
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
 ; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_nxv16i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    mv a2, a0
-; RV64-NEXT:    bltu a0, a1, .LBB47_2
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    sub a1, a0, a2
+; RV64-NEXT:    sltu a3, a0, a1
+; RV64-NEXT:    addi a3, a3, -1
+; RV64-NEXT:    and a3, a3, a1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a4, a1, 32
+; RV64-NEXT:    add a1, a1, a4
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v0, v8, 2
+; RV64-NEXT:    vxor.vx v0, v0, a1
+; RV64-NEXT:    vadd.vv v24, v0, v0
+; RV64-NEXT:    vxor.vv v16, v0, v24
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v24, v8, 1
+; RV64-NEXT:    vand.vv v16, v24, v16
+; RV64-NEXT:    vsub.vv v16, v8, v16
+; RV64-NEXT:    vand.vv v24, v16, v0
+; RV64-NEXT:    vsrl.vi v16, v16, 2
+; RV64-NEXT:    vand.vv v16, v16, v0
+; RV64-NEXT:    vadd.vv v16, v24, v16
+; RV64-NEXT:    vsrl.vi v24, v16, 4
+; RV64-NEXT:    vadd.vv v16, v16, v24
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v24, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vmv8r.v v8, v24
+; RV64-NEXT:    vsrl.vi v24, v24, 3
+; RV64-NEXT:    vand.vx v24, v24, a1
+; RV64-NEXT:    vmul.vv v16, v16, v24
+; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    vsrl.vx v16, v16, a3
+; RV64-NEXT:    addi a4, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    bltu a0, a2, .LBB47_2
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a2, a1
+; RV64-NEXT:    mv a0, a2
 ; RV64-NEXT:  .LBB47_2:
-; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v8, 1
-; RV64-NEXT:    lui a2, 349525
-; RV64-NEXT:    addiw a2, a2, 1365
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    vand.vx v24, v24, a2
-; RV64-NEXT:    vsub.vv v8, v8, v24
-; RV64-NEXT:    lui a3, 209715
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v24, v8, a3
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vadd.vv v8, v24, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v24
-; RV64-NEXT:    lui a4, 61681
-; RV64-NEXT:    addiw a4, a4, -241
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    lui a5, 4112
-; RV64-NEXT:    addiw a5, a5, 257
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vmul.vx v8, v8, a5
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a6
-; RV64-NEXT:    sub a1, a0, a1
-; RV64-NEXT:    sltu a0, a0, a1
-; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    vand.vx v24, v24, a2
-; RV64-NEXT:    vsub.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v24, v16, a3
+; RV64-NEXT:    vsll.vi v24, v8, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v16, v24, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v0, v8, 1
+; RV64-NEXT:    vand.vv v16, v0, v16
+; RV64-NEXT:    vsub.vv v16, v8, v16
+; RV64-NEXT:    vand.vv v0, v16, v24
 ; RV64-NEXT:    vsrl.vi v16, v16, 2
-; RV64-NEXT:    vand.vx v16, v16, a3
-; RV64-NEXT:    vadd.vv v16, v24, v16
+; RV64-NEXT:    vand.vv v16, v16, v24
+; RV64-NEXT:    vadd.vv v16, v0, v16
 ; RV64-NEXT:    vsrl.vi v24, v16, 4
 ; RV64-NEXT:    vadd.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v16, v16, a4
-; RV64-NEXT:    vmul.vx v16, v16, a5
-; RV64-NEXT:    vsrl.vx v16, v16, a6
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v8, v8, 3
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vmul.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vx v8, v8, a3
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_ctpop_nxv16i64_unmasked:
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
index b14cde25aa85b2..7e0285e7a0d64a 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-sdnode.ll
@@ -1135,35 +1135,28 @@ declare <vscale x 16 x i32> @llvm.cttz.nxv16i32(<vscale x 16 x i32>, i1)
 define <vscale x 1 x i64> @cttz_nxv1i64(<vscale x 1 x i64> %va) {
 ; RV32I-LABEL: cttz_nxv1i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v9, v9, v10
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v10, v8, v9
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
 ; RV32I-NEXT:    lui a0, 61681
 ; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV32I-NEXT:    vmv.v.x v9, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV32I-NEXT:    vsll.vi v10, v9, 2
+; RV32I-NEXT:    vxor.vv v10, v9, v10
+; RV32I-NEXT:    vadd.vv v11, v10, v10
+; RV32I-NEXT:    vxor.vv v11, v10, v11
+; RV32I-NEXT:    li a0, 1
+; RV32I-NEXT:    vsub.vx v12, v8, a0
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v12
+; RV32I-NEXT:    vsrl.vi v12, v8, 1
+; RV32I-NEXT:    vand.vv v11, v12, v11
+; RV32I-NEXT:    vsub.vv v8, v8, v11
+; RV32I-NEXT:    vand.vv v11, v8, v10
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vadd.vv v8, v11, v8
+; RV32I-NEXT:    vsrl.vi v10, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v10
 ; RV32I-NEXT:    vand.vv v8, v8, v9
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
@@ -1298,35 +1291,28 @@ declare <vscale x 1 x i64> @llvm.cttz.nxv1i64(<vscale x 1 x i64>, i1)
 define <vscale x 2 x i64> @cttz_nxv2i64(<vscale x 2 x i64> %va) {
 ; RV32I-LABEL: cttz_nxv2i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
-; RV32I-NEXT:    vsub.vx v10, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v12, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v10, v10, v12
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v12, v8, v10
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vadd.vv v8, v12, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
 ; RV32I-NEXT:    lui a0, 61681
 ; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV32I-NEXT:    vmv.v.x v10, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV32I-NEXT:    vsll.vi v12, v10, 2
+; RV32I-NEXT:    vxor.vv v12, v10, v12
+; RV32I-NEXT:    vadd.vv v14, v12, v12
+; RV32I-NEXT:    vxor.vv v14, v12, v14
+; RV32I-NEXT:    li a0, 1
+; RV32I-NEXT:    vsub.vx v16, v8, a0
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v16
+; RV32I-NEXT:    vsrl.vi v16, v8, 1
+; RV32I-NEXT:    vand.vv v14, v16, v14
+; RV32I-NEXT:    vsub.vv v8, v8, v14
+; RV32I-NEXT:    vand.vv v14, v8, v12
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v12
+; RV32I-NEXT:    vadd.vv v8, v14, v8
+; RV32I-NEXT:    vsrl.vi v12, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v12
 ; RV32I-NEXT:    vand.vv v8, v8, v10
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
@@ -1461,35 +1447,28 @@ declare <vscale x 2 x i64> @llvm.cttz.nxv2i64(<vscale x 2 x i64>, i1)
 define <vscale x 4 x i64> @cttz_nxv4i64(<vscale x 4 x i64> %va) {
 ; RV32I-LABEL: cttz_nxv4i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
-; RV32I-NEXT:    vsub.vx v12, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32I-NEXT:    vmv.v.x v16, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32I-NEXT:    vand.vv v12, v12, v16
-; RV32I-NEXT:    vsub.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32I-NEXT:    vmv.v.x v12, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32I-NEXT:    vand.vv v16, v8, v12
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v12
-; RV32I-NEXT:    vadd.vv v8, v16, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v12
 ; RV32I-NEXT:    lui a0, 61681
 ; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; RV32I-NEXT:    vmv.v.x v12, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32I-NEXT:    vsll.vi v16, v12, 2
+; RV32I-NEXT:    vxor.vv v16, v12, v16
+; RV32I-NEXT:    vadd.vv v20, v16, v16
+; RV32I-NEXT:    vxor.vv v20, v16, v20
+; RV32I-NEXT:    li a0, 1
+; RV32I-NEXT:    vsub.vx v24, v8, a0
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v24
+; RV32I-NEXT:    vsrl.vi v24, v8, 1
+; RV32I-NEXT:    vand.vv v20, v24, v20
+; RV32I-NEXT:    vsub.vv v8, v8, v20
+; RV32I-NEXT:    vand.vv v20, v8, v16
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v16
+; RV32I-NEXT:    vadd.vv v8, v20, v8
+; RV32I-NEXT:    vsrl.vi v16, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v16
 ; RV32I-NEXT:    vand.vv v8, v8, v12
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
@@ -1624,35 +1603,48 @@ declare <vscale x 4 x i64> @llvm.cttz.nxv4i64(<vscale x 4 x i64>, i1)
 define <vscale x 8 x i64> @cttz_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32I-LABEL: cttz_nxv8i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32I-NEXT:    vsub.vx v16, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v16
-; RV32I-NEXT:    vsrl.vi v16, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vmv.v.x v24, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32I-NEXT:    vand.vv v16, v16, v24
-; RV32I-NEXT:    vsub.vv v8, v8, v16
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vmv.v.x v16, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32I-NEXT:    vand.vv v24, v8, v16
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v16
-; RV32I-NEXT:    vadd.vv v8, v24, v8
-; RV32I-NEXT:    vsrl.vi v16, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v16
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    sub sp, sp, a0
+; RV32I-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32I-NEXT:    lui a0, 61681
 ; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
 ; RV32I-NEXT:    vmv.v.x v16, a0
+; RV32I-NEXT:    addi a0, sp, 16
+; RV32I-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV32I-NEXT:    vsll.vi v24, v16, 2
+; RV32I-NEXT:    vxor.vv v24, v16, v24
+; RV32I-NEXT:    li a0, 1
+; RV32I-NEXT:    vsub.vx v0, v8, a0
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v0
+; RV32I-NEXT:    vadd.vv v0, v24, v24
+; RV32I-NEXT:    vxor.vv v0, v24, v0
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    addi a0, a0, 16
+; RV32I-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32I-NEXT:    vsrl.vi v0, v8, 1
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    addi a0, a0, 16
+; RV32I-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32I-NEXT:    vand.vv v0, v0, v16
+; RV32I-NEXT:    vsub.vv v8, v8, v0
+; RV32I-NEXT:    vand.vv v0, v8, v24
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v24
+; RV32I-NEXT:    vadd.vv v8, v0, v8
+; RV32I-NEXT:    vsrl.vi v24, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v24
+; RV32I-NEXT:    addi a0, sp, 16
+; RV32I-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32I-NEXT:    vand.vv v8, v8, v16
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
@@ -1662,6 +1654,10 @@ define <vscale x 8 x i64> @cttz_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32I-NEXT:    vmul.vv v8, v8, v16
 ; RV32I-NEXT:    li a0, 56
 ; RV32I-NEXT:    vsrl.vx v8, v8, a0
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add sp, sp, a0
+; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: cttz_nxv8i64:
@@ -2813,35 +2809,28 @@ define <vscale x 16 x i32> @cttz_zero_undef_nxv16i32(<vscale x 16 x i32> %va) {
 define <vscale x 1 x i64> @cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
 ; RV32I-LABEL: cttz_zero_undef_nxv1i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e64, m1, ta, ma
-; RV32I-NEXT:    vsub.vx v9, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v9, v9, v10
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v10, v8, v9
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
 ; RV32I-NEXT:    lui a0, 61681
 ; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m1, ta, ma
 ; RV32I-NEXT:    vmv.v.x v9, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m1, ta, ma
+; RV32I-NEXT:    vsll.vi v10, v9, 2
+; RV32I-NEXT:    vxor.vv v10, v9, v10
+; RV32I-NEXT:    vadd.vv v11, v10, v10
+; RV32I-NEXT:    vxor.vv v11, v10, v11
+; RV32I-NEXT:    li a0, 1
+; RV32I-NEXT:    vsub.vx v12, v8, a0
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v12
+; RV32I-NEXT:    vsrl.vi v12, v8, 1
+; RV32I-NEXT:    vand.vv v11, v12, v11
+; RV32I-NEXT:    vsub.vv v8, v8, v11
+; RV32I-NEXT:    vand.vv v11, v8, v10
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vadd.vv v8, v11, v8
+; RV32I-NEXT:    vsrl.vi v10, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v10
 ; RV32I-NEXT:    vand.vv v8, v8, v9
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
@@ -2933,35 +2922,28 @@ define <vscale x 1 x i64> @cttz_zero_undef_nxv1i64(<vscale x 1 x i64> %va) {
 define <vscale x 2 x i64> @cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
 ; RV32I-LABEL: cttz_zero_undef_nxv2i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e64, m2, ta, ma
-; RV32I-NEXT:    vsub.vx v10, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v12, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v10, v10, v12
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v12, v8, v10
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vadd.vv v8, v12, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
 ; RV32I-NEXT:    lui a0, 61681
 ; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m2, ta, ma
 ; RV32I-NEXT:    vmv.v.x v10, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; RV32I-NEXT:    vsll.vi v12, v10, 2
+; RV32I-NEXT:    vxor.vv v12, v10, v12
+; RV32I-NEXT:    vadd.vv v14, v12, v12
+; RV32I-NEXT:    vxor.vv v14, v12, v14
+; RV32I-NEXT:    li a0, 1
+; RV32I-NEXT:    vsub.vx v16, v8, a0
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v16
+; RV32I-NEXT:    vsrl.vi v16, v8, 1
+; RV32I-NEXT:    vand.vv v14, v16, v14
+; RV32I-NEXT:    vsub.vv v8, v8, v14
+; RV32I-NEXT:    vand.vv v14, v8, v12
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v12
+; RV32I-NEXT:    vadd.vv v8, v14, v8
+; RV32I-NEXT:    vsrl.vi v12, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v12
 ; RV32I-NEXT:    vand.vv v8, v8, v10
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
@@ -3053,35 +3035,28 @@ define <vscale x 2 x i64> @cttz_zero_undef_nxv2i64(<vscale x 2 x i64> %va) {
 define <vscale x 4 x i64> @cttz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
 ; RV32I-LABEL: cttz_zero_undef_nxv4i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e64, m4, ta, ma
-; RV32I-NEXT:    vsub.vx v12, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v12
-; RV32I-NEXT:    vsrl.vi v12, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32I-NEXT:    vmv.v.x v16, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32I-NEXT:    vand.vv v12, v12, v16
-; RV32I-NEXT:    vsub.vv v8, v8, v12
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
-; RV32I-NEXT:    vmv.v.x v12, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
-; RV32I-NEXT:    vand.vv v16, v8, v12
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v12
-; RV32I-NEXT:    vadd.vv v8, v16, v8
-; RV32I-NEXT:    vsrl.vi v12, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v12
 ; RV32I-NEXT:    lui a0, 61681
 ; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m4, ta, ma
 ; RV32I-NEXT:    vmv.v.x v12, a0
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m4, ta, ma
+; RV32I-NEXT:    vsll.vi v16, v12, 2
+; RV32I-NEXT:    vxor.vv v16, v12, v16
+; RV32I-NEXT:    vadd.vv v20, v16, v16
+; RV32I-NEXT:    vxor.vv v20, v16, v20
+; RV32I-NEXT:    li a0, 1
+; RV32I-NEXT:    vsub.vx v24, v8, a0
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v24
+; RV32I-NEXT:    vsrl.vi v24, v8, 1
+; RV32I-NEXT:    vand.vv v20, v24, v20
+; RV32I-NEXT:    vsub.vv v8, v8, v20
+; RV32I-NEXT:    vand.vv v20, v8, v16
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v16
+; RV32I-NEXT:    vadd.vv v8, v20, v8
+; RV32I-NEXT:    vsrl.vi v16, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v16
 ; RV32I-NEXT:    vand.vv v8, v8, v12
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
@@ -3173,35 +3148,48 @@ define <vscale x 4 x i64> @cttz_zero_undef_nxv4i64(<vscale x 4 x i64> %va) {
 define <vscale x 8 x i64> @cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32I-LABEL: cttz_zero_undef_nxv8i64:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    li a0, 1
-; RV32I-NEXT:    vsetvli a1, zero, e64, m8, ta, ma
-; RV32I-NEXT:    vsub.vx v16, v8, a0
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v16
-; RV32I-NEXT:    vsrl.vi v16, v8, 1
-; RV32I-NEXT:    lui a0, 349525
-; RV32I-NEXT:    addi a0, a0, 1365
-; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vmv.v.x v24, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32I-NEXT:    vand.vv v16, v16, v24
-; RV32I-NEXT:    vsub.vv v8, v8, v16
-; RV32I-NEXT:    lui a0, 209715
-; RV32I-NEXT:    addi a0, a0, 819
-; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
-; RV32I-NEXT:    vmv.v.x v16, a0
-; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
-; RV32I-NEXT:    vand.vv v24, v8, v16
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v16
-; RV32I-NEXT:    vadd.vv v8, v24, v8
-; RV32I-NEXT:    vsrl.vi v16, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v16
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    .cfi_def_cfa_offset 16
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    sub sp, sp, a0
+; RV32I-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32I-NEXT:    lui a0, 61681
 ; RV32I-NEXT:    addi a0, a0, -241
 ; RV32I-NEXT:    vsetvli a1, zero, e32, m8, ta, ma
 ; RV32I-NEXT:    vmv.v.x v16, a0
+; RV32I-NEXT:    addi a0, sp, 16
+; RV32I-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32I-NEXT:    vsetvli a0, zero, e64, m8, ta, ma
+; RV32I-NEXT:    vsll.vi v24, v16, 2
+; RV32I-NEXT:    vxor.vv v24, v16, v24
+; RV32I-NEXT:    li a0, 1
+; RV32I-NEXT:    vsub.vx v0, v8, a0
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v0
+; RV32I-NEXT:    vadd.vv v0, v24, v24
+; RV32I-NEXT:    vxor.vv v0, v24, v0
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    addi a0, a0, 16
+; RV32I-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32I-NEXT:    vsrl.vi v0, v8, 1
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 3
+; RV32I-NEXT:    add a0, sp, a0
+; RV32I-NEXT:    addi a0, a0, 16
+; RV32I-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32I-NEXT:    vand.vv v0, v0, v16
+; RV32I-NEXT:    vsub.vv v8, v8, v0
+; RV32I-NEXT:    vand.vv v0, v8, v24
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v24
+; RV32I-NEXT:    vadd.vv v8, v0, v8
+; RV32I-NEXT:    vsrl.vi v24, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v24
+; RV32I-NEXT:    addi a0, sp, 16
+; RV32I-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32I-NEXT:    vand.vv v8, v8, v16
 ; RV32I-NEXT:    lui a0, 4112
 ; RV32I-NEXT:    addi a0, a0, 257
@@ -3211,6 +3199,10 @@ define <vscale x 8 x i64> @cttz_zero_undef_nxv8i64(<vscale x 8 x i64> %va) {
 ; RV32I-NEXT:    vmul.vv v8, v8, v16
 ; RV32I-NEXT:    li a0, 56
 ; RV32I-NEXT:    vsrl.vx v8, v8, a0
+; RV32I-NEXT:    csrr a0, vlenb
+; RV32I-NEXT:    slli a0, a0, 4
+; RV32I-NEXT:    add sp, sp, a0
+; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: cttz_zero_undef_nxv8i64:
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
index 145ce6e917f962..071b76899e7523 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
@@ -1365,41 +1365,31 @@ declare <vscale x 1 x i64> @llvm.vp.cttz.nxv1i64(<vscale x 1 x i64>, i1 immarg,
 define <vscale x 1 x i64> @vp_cttz_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_nxv1i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsll.vi v10, v9, 2, v0.t
+; RV32-NEXT:    vxor.vv v10, v9, v10, v0.t
+; RV32-NEXT:    vsll.vi v11, v10, 1, v0.t
+; RV32-NEXT:    vxor.vv v11, v10, v11, v0.t
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v12, v8, a0, v0.t
+; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v11, v12, v11, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v11, v0.t
+; RV32-NEXT:    vand.vv v11, v8, v10, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV32-NEXT:    vadd.vv v8, v11, v8, v0.t
+; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v9, 3, v0.t
+; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -1407,38 +1397,34 @@ define <vscale x 1 x i64> @vp_cttz_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x
 ;
 ; RV64-LABEL: vp_cttz_nxv1i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v10, v9, 2, v0.t
+; RV64-NEXT:    vxor.vx v10, v10, a1, v0.t
+; RV64-NEXT:    vsll.vi v11, v10, 1, v0.t
+; RV64-NEXT:    vxor.vv v11, v10, v11, v0.t
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v12, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v11, v12, v11, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v11, v0.t
+; RV64-NEXT:    vand.vv v11, v8, v10, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vadd.vv v8, v11, v8, v0.t
+; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v9, v9, 3, v0.t
+; RV64-NEXT:    vand.vx v9, v9, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -1455,41 +1441,31 @@ define <vscale x 1 x i64> @vp_cttz_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1 x
 define <vscale x 1 x i64> @vp_cttz_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_nxv1i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsll.vi v10, v9, 2
+; RV32-NEXT:    vxor.vv v10, v9, v10
+; RV32-NEXT:    vadd.vv v11, v10, v10
+; RV32-NEXT:    vxor.vv v11, v10, v11
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v12, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    vand.vv v11, v12, v11
+; RV32-NEXT:    vsub.vv v8, v8, v11
+; RV32-NEXT:    vand.vv v11, v8, v10
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vadd.vv v8, v11, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v9, 3
+; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -1497,38 +1473,34 @@ define <vscale x 1 x i64> @vp_cttz_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32
 ;
 ; RV64-LABEL: vp_cttz_nxv1i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
+; RV64-NEXT:    vsll.vi v10, v9, 2
+; RV64-NEXT:    vxor.vx v10, v10, a1
+; RV64-NEXT:    vadd.vv v11, v10, v10
+; RV64-NEXT:    vxor.vv v11, v10, v11
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v12, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v12
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vv v11, v12, v11
+; RV64-NEXT:    vsub.vv v8, v8, v11
+; RV64-NEXT:    vand.vv v11, v8, v10
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v10
+; RV64-NEXT:    vadd.vv v8, v11, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v10
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v9, v9, 3
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -1549,41 +1521,31 @@ declare <vscale x 2 x i64> @llvm.vp.cttz.nxv2i64(<vscale x 2 x i64>, i1 immarg,
 define <vscale x 2 x i64> @vp_cttz_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_nxv2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsll.vi v12, v10, 2, v0.t
+; RV32-NEXT:    vxor.vv v12, v10, v12, v0.t
+; RV32-NEXT:    vsll.vi v14, v12, 1, v0.t
+; RV32-NEXT:    vxor.vv v14, v12, v14, v0.t
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v16, v8, a0, v0.t
+; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v14, v16, v14, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v14, v0.t
+; RV32-NEXT:    vand.vv v14, v8, v12, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vadd.vv v8, v14, v8, v0.t
+; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v10, 3, v0.t
+; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -1591,38 +1553,34 @@ define <vscale x 2 x i64> @vp_cttz_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x
 ;
 ; RV64-LABEL: vp_cttz_nxv2i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v12, v10, 2, v0.t
+; RV64-NEXT:    vxor.vx v12, v12, a1, v0.t
+; RV64-NEXT:    vsll.vi v14, v12, 1, v0.t
+; RV64-NEXT:    vxor.vv v14, v12, v14, v0.t
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v14, v16, v14, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v14, v0.t
+; RV64-NEXT:    vand.vv v14, v8, v12, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vadd.vv v8, v14, v8, v0.t
+; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v10, v10, 3, v0.t
+; RV64-NEXT:    vand.vx v10, v10, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -1639,41 +1597,31 @@ define <vscale x 2 x i64> @vp_cttz_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2 x
 define <vscale x 2 x i64> @vp_cttz_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_nxv2i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsll.vi v12, v10, 2
+; RV32-NEXT:    vxor.vv v12, v10, v12
+; RV32-NEXT:    vadd.vv v14, v12, v12
+; RV32-NEXT:    vxor.vv v14, v12, v14
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v16, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vand.vv v14, v16, v14
+; RV32-NEXT:    vsub.vv v8, v8, v14
+; RV32-NEXT:    vand.vv v14, v8, v12
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vadd.vv v8, v14, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v10, 3
+; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -1681,38 +1629,34 @@ define <vscale x 2 x i64> @vp_cttz_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32
 ;
 ; RV64-LABEL: vp_cttz_nxv2i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1
+; RV64-NEXT:    vsll.vi v12, v10, 2
+; RV64-NEXT:    vxor.vx v12, v12, a1
+; RV64-NEXT:    vadd.vv v14, v12, v12
+; RV64-NEXT:    vxor.vv v14, v12, v14
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v16, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vand.vv v14, v16, v14
+; RV64-NEXT:    vsub.vv v8, v8, v14
+; RV64-NEXT:    vand.vv v14, v8, v12
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v12
+; RV64-NEXT:    vadd.vv v8, v14, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v12
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v10, v10, 3
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vmul.vv v8, v8, v10
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -1733,41 +1677,31 @@ declare <vscale x 4 x i64> @llvm.vp.cttz.nxv4i64(<vscale x 4 x i64>, i1 immarg,
 define <vscale x 4 x i64> @vp_cttz_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_nxv4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsll.vi v16, v12, 2, v0.t
+; RV32-NEXT:    vxor.vv v16, v12, v16, v0.t
+; RV32-NEXT:    vsll.vi v20, v16, 1, v0.t
+; RV32-NEXT:    vxor.vv v20, v16, v20, v0.t
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v24, v8, a0, v0.t
+; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v20, v24, v20, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v20, v0.t
+; RV32-NEXT:    vand.vv v20, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vadd.vv v8, v20, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v12, 3, v0.t
+; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -1775,38 +1709,34 @@ define <vscale x 4 x i64> @vp_cttz_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x
 ;
 ; RV64-LABEL: vp_cttz_nxv4i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v12, 2, v0.t
+; RV64-NEXT:    vxor.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vsll.vi v20, v16, 1, v0.t
+; RV64-NEXT:    vxor.vv v20, v16, v20, v0.t
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v24, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV64-NEXT:    vsrl.vi v24, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v20, v24, v20, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v20, v0.t
+; RV64-NEXT:    vand.vv v20, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vadd.vv v8, v20, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v12, v12, 3, v0.t
+; RV64-NEXT:    vand.vx v12, v12, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -1823,41 +1753,31 @@ define <vscale x 4 x i64> @vp_cttz_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4 x
 define <vscale x 4 x i64> @vp_cttz_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_nxv4i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsll.vi v16, v12, 2
+; RV32-NEXT:    vxor.vv v16, v12, v16
+; RV32-NEXT:    vadd.vv v20, v16, v16
+; RV32-NEXT:    vxor.vv v20, v16, v20
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v24, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 1
+; RV32-NEXT:    vand.vv v20, v24, v20
+; RV32-NEXT:    vsub.vv v8, v8, v20
+; RV32-NEXT:    vand.vv v20, v8, v16
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vadd.vv v8, v20, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
 ; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v12, 3
+; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v12
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -1865,38 +1785,34 @@ define <vscale x 4 x i64> @vp_cttz_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32
 ;
 ; RV64-LABEL: vp_cttz_nxv4i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1
+; RV64-NEXT:    vsll.vi v16, v12, 2
+; RV64-NEXT:    vxor.vx v16, v16, a1
+; RV64-NEXT:    vadd.vv v20, v16, v16
+; RV64-NEXT:    vxor.vv v20, v16, v20
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v24, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
+; RV64-NEXT:    vsrl.vi v24, v8, 1
+; RV64-NEXT:    vand.vv v20, v24, v20
+; RV64-NEXT:    vsub.vv v8, v8, v20
+; RV64-NEXT:    vand.vv v20, v8, v16
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vadd.vv v8, v20, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v12, v12, 3
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vmul.vv v8, v8, v12
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -1917,82 +1833,182 @@ declare <vscale x 7 x i64> @llvm.vp.cttz.nxv7i64(<vscale x 7 x i64>, i1 immarg,
 define <vscale x 7 x i64> @vp_cttz_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_nxv7i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.x v8, a1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
+; RV32-NEXT:    vsll.vi v24, v8, 2, v0.t
+; RV32-NEXT:    vxor.vv v24, v8, v24, v0.t
+; RV32-NEXT:    vsll.vi v8, v24, 1, v0.t
+; RV32-NEXT:    vxor.vv v8, v24, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_nxv7i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v24, v16, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v24, 1, v0.t
+; RV64-NEXT:    vxor.vv v8, v24, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v16, v8, v24, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v24, v0.t
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv7i64:
@@ -2007,82 +2023,106 @@ define <vscale x 7 x i64> @vp_cttz_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7 x
 define <vscale x 7 x i64> @vp_cttz_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_nxv7i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
 ; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v24, v16, 2
+; RV32-NEXT:    vxor.vv v24, v16, v24
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v0, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vadd.vv v0, v24, v24
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v8
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v0, v8, v24
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v24
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3
+; RV32-NEXT:    vand.vv v16, v16, v24
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_nxv7i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1
+; RV64-NEXT:    vsll.vi v24, v16, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v0, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v0, v24, v0
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
+; RV64-NEXT:    vand.vv v16, v16, v0
 ; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vand.vv v16, v8, v24
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
 ; RV64-NEXT:    vadd.vv v8, v16, v8
 ; RV64-NEXT:    vsrl.vi v16, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv7i64_unmasked:
@@ -2101,82 +2141,182 @@ declare <vscale x 8 x i64> @llvm.vp.cttz.nxv8i64(<vscale x 8 x i64>, i1 immarg,
 define <vscale x 8 x i64> @vp_cttz_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_nxv8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
+; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.x v8, a1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
+; RV32-NEXT:    vsll.vi v24, v8, 2, v0.t
+; RV32-NEXT:    vxor.vv v24, v8, v24, v0.t
+; RV32-NEXT:    vsll.vi v8, v24, 1, v0.t
+; RV32-NEXT:    vxor.vv v8, v24, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_nxv8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v24, v16, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v24, 1, v0.t
+; RV64-NEXT:    vxor.vv v8, v24, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v16, v8, v24, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v24, v0.t
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv8i64:
@@ -2191,82 +2331,106 @@ define <vscale x 8 x i64> @vp_cttz_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8 x
 define <vscale x 8 x i64> @vp_cttz_nxv8i64_unmasked(<vscale x 8 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_nxv8i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v24, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
 ; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v24, v16, 2
+; RV32-NEXT:    vxor.vv v24, v16, v24
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v0, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vadd.vv v0, v24, v24
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v8
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v0, v8, v24
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v24
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetvli a2, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3
+; RV32-NEXT:    vand.vv v16, v16, v24
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_nxv8i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetvli a2, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1
+; RV64-NEXT:    vsll.vi v24, v16, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v0, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v0, v24, v0
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
+; RV64-NEXT:    vand.vv v16, v16, v0
 ; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vand.vv v16, v8, v24
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
 ; RV64-NEXT:    vadd.vv v8, v16, v8
 ; RV64-NEXT:    vsrl.vi v16, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv8i64_unmasked:
@@ -2288,13 +2452,19 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    li a2, 48
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 56 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV32-NEXT:    vmv1r.v v24, v0
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -2305,120 +2475,114 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
 ; RV32-NEXT:    sub a2, a0, a1
 ; RV32-NEXT:    sltu a3, a0, a2
 ; RV32-NEXT:    addi a3, a3, -1
-; RV32-NEXT:    and a3, a3, a2
+; RV32-NEXT:    and a2, a3, a2
+; RV32-NEXT:    lui a3, 61681
+; RV32-NEXT:    addi a3, a3, -241
+; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.x v16, a3
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 40
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV32-NEXT:    li a2, 1
-; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v8, v16, a2, v0.t
-; RV32-NEXT:    vnot.v v16, v16, v0.t
-; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 40
-; RV32-NEXT:    mul a4, a4, a5
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 48
-; RV32-NEXT:    mul a4, a4, a5
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    lui a4, 349525
-; RV32-NEXT:    addi a4, a4, 1365
-; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a4
-; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 24
-; RV32-NEXT:    mul a4, a4, a5
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 48
-; RV32-NEXT:    mul a4, a4, a5
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vx v16, v8, a2, v0.t
+; RV32-NEXT:    addi a3, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vnot.v v16, v8, v0.t
+; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 40
-; RV32-NEXT:    mul a4, a4, a5
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 40
-; RV32-NEXT:    mul a4, a4, a5
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    lui a4, 209715
-; RV32-NEXT:    addi a4, a4, 819
-; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a4
-; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 40
-; RV32-NEXT:    mul a4, a4, a5
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 3
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a4, a4, 4
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 40
-; RV32-NEXT:    mul a4, a4, a5
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 5
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 48
-; RV32-NEXT:    mul a4, a4, a5
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 5
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a4, a4, 4
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
 ; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
-; RV32-NEXT:    lui a4, 61681
-; RV32-NEXT:    addi a4, a4, -241
-; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a4
-; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a4, a4, 4
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    lui a4, 4112
-; RV32-NEXT:    addi a4, a4, 257
-; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a4
-; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    li a4, 40
+; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 3
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vmul.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsrl.vi v8, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 5
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a3, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a3, v0.t
-; RV32-NEXT:    addi a4, sp, 16
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 4
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 16
 ; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
 ; RV32-NEXT:    bltu a0, a1, .LBB46_2
 ; RV32-NEXT:  # %bb.1:
@@ -2427,107 +2591,113 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vx v8, v16, a2, v0.t
-; RV32-NEXT:    vnot.v v16, v16, v0.t
-; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vx v8, v16, a2, v0.t
+; RV32-NEXT:    addi a0, sp, 16
 ; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vnot.v v8, v16, v0.t
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 24
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    li a1, 24
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    li a1, 24
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    li a1, 24
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 40
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vx v8, v8, a3, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vx v8, v8, a3, v0.t
-; RV32-NEXT:    addi a0, sp, 16
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    li a1, 48
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
 ; RV32-NEXT:    addi sp, sp, 16
@@ -2538,89 +2708,235 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64(<vscale x 16 x i64> %va, <vscale x
 ; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    li a2, 48
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    sub sp, sp, a1
-; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV64-NEXT:    vmv1r.v v24, v0
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    slli a1, a1, 4
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
-; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    srli a2, a1, 3
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    srli a1, a2, 3
 ; RV64-NEXT:    vsetvli a3, zero, e8, mf4, ta, ma
-; RV64-NEXT:    vslidedown.vx v0, v0, a2
-; RV64-NEXT:    sub a2, a0, a1
-; RV64-NEXT:    sltu a3, a0, a2
+; RV64-NEXT:    vslidedown.vx v0, v0, a1
+; RV64-NEXT:    sub a1, a0, a2
+; RV64-NEXT:    sltu a3, a0, a1
 ; RV64-NEXT:    addi a3, a3, -1
-; RV64-NEXT:    and a3, a3, a2
-; RV64-NEXT:    li a2, 1
+; RV64-NEXT:    and a3, a3, a1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a4, a1, 32
+; RV64-NEXT:    add a1, a1, a4
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    li a5, 40
+; RV64-NEXT:    mul a4, a4, a5
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v8, v16, a2, v0.t
-; RV64-NEXT:    vnot.v v16, v16, v0.t
-; RV64-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV64-NEXT:    vsrl.vi v8, v16, 1, v0.t
-; RV64-NEXT:    lui a3, 349525
-; RV64-NEXT:    addiw a3, a3, 1365
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV64-NEXT:    vsub.vv v16, v16, v8, v0.t
-; RV64-NEXT:    lui a4, 209715
-; RV64-NEXT:    addiw a4, a4, 819
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v16, a4, v0.t
-; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    li a3, 1
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v8, v16, a3, v0.t
+; RV64-NEXT:    addi a4, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    vnot.v v8, v16, v0.t
+; RV64-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 3
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 5
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 5
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8r.v v8, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a5, 61681
-; RV64-NEXT:    addiw a5, a5, -241
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vand.vx v8, v8, a5, v0.t
-; RV64-NEXT:    lui a6, 4112
-; RV64-NEXT:    addiw a6, a6, 257
-; RV64-NEXT:    slli a7, a6, 32
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    vmul.vx v8, v8, a6, v0.t
-; RV64-NEXT:    li a7, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a7, v0.t
-; RV64-NEXT:    addi t0, sp, 16
-; RV64-NEXT:    vs8r.v v8, (t0) # Unknown-size Folded Spill
-; RV64-NEXT:    bltu a0, a1, .LBB46_2
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    li a5, 40
+; RV64-NEXT:    mul a4, a4, a5
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV64-NEXT:    li a4, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a4, v0.t
+; RV64-NEXT:    csrr a5, vlenb
+; RV64-NEXT:    slli a5, a5, 4
+; RV64-NEXT:    add a5, sp, a5
+; RV64-NEXT:    addi a5, a5, 16
+; RV64-NEXT:    vs8r.v v8, (a5) # Unknown-size Folded Spill
+; RV64-NEXT:    bltu a0, a2, .LBB46_2
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    mv a0, a2
 ; RV64-NEXT:  .LBB46_2:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    mul a0, a0, a2
 ; RV64-NEXT:    add a0, sp, a0
 ; RV64-NEXT:    addi a0, a0, 16
 ; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vsub.vx v16, v8, a2, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v8, v16, a3, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vnot.v v8, v16, v0.t
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a3, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v16, v8, a4, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a5, v0.t
-; RV64-NEXT:    vmul.vx v8, v8, a6, v0.t
-; RV64-NEXT:    vsrl.vx v8, v8, a7, v0.t
-; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
 ; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vx v8, v8, a4, v0.t
 ; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 48
+; RV64-NEXT:    mul a0, a0, a1
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
@@ -2656,12 +2972,12 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64_unmasked(<vscale x 16 x i64> %va, i
 ; RV32-NEXT:    addi sp, sp, -16
 ; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    li a2, 24
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 40 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    slli a1, a1, 4
 ; RV32-NEXT:    add a1, sp, a1
 ; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
@@ -2669,62 +2985,53 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64_unmasked(<vscale x 16 x i64> %va, i
 ; RV32-NEXT:    sub a2, a0, a1
 ; RV32-NEXT:    sltu a3, a0, a2
 ; RV32-NEXT:    addi a3, a3, -1
-; RV32-NEXT:    and a3, a3, a2
+; RV32-NEXT:    and a2, a3, a2
+; RV32-NEXT:    lui a3, 61681
+; RV32-NEXT:    addi a3, a3, -241
+; RV32-NEXT:    vsetvli a4, zero, e32, m8, ta, ma
+; RV32-NEXT:    vmv.v.x v24, a3
+; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV32-NEXT:    li a2, 1
-; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v8, v16, a2
+; RV32-NEXT:    vsub.vx v0, v16, a2
 ; RV32-NEXT:    vnot.v v16, v16
-; RV32-NEXT:    vand.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 1
-; RV32-NEXT:    lui a4, 349525
-; RV32-NEXT:    addi a4, a4, 1365
-; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a4
-; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    li a5, 24
-; RV32-NEXT:    mul a4, a4, a5
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    vand.vv v24, v24, v16
-; RV32-NEXT:    vsub.vv v8, v8, v24
-; RV32-NEXT:    lui a4, 209715
-; RV32-NEXT:    addi a4, a4, 819
-; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v0, a4
-; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v24
-; RV32-NEXT:    lui a4, 61681
-; RV32-NEXT:    addi a4, a4, -241
-; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a4
-; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV32-NEXT:    csrr a4, vlenb
-; RV32-NEXT:    slli a4, a4, 4
-; RV32-NEXT:    add a4, sp, a4
-; RV32-NEXT:    addi a4, a4, 16
-; RV32-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    lui a4, 4112
-; RV32-NEXT:    addi a4, a4, 257
-; RV32-NEXT:    vsetvli a5, zero, e32, m8, ta, ma
-; RV32-NEXT:    vmv.v.x v8, a4
-; RV32-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v8, v16, v0
+; RV32-NEXT:    addi a3, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v0, v24, 2
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    vadd.vv v16, v0, v0
+; RV32-NEXT:    vxor.vv v16, v0, v16
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    slli a3, a3, 3
 ; RV32-NEXT:    add a3, sp, a3
 ; RV32-NEXT:    addi a3, a3, 16
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vmul.vv v16, v16, v8
+; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    csrr a3, vlenb
+; RV32-NEXT:    slli a3, a3, 3
+; RV32-NEXT:    add a3, sp, a3
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8
+; RV32-NEXT:    addi a3, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vadd.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
+; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a3, 56
-; RV32-NEXT:    vsrl.vx v8, v16, a3
-; RV32-NEXT:    addi a4, sp, 16
+; RV32-NEXT:    vsrl.vx v8, v8, a3
+; RV32-NEXT:    csrr a4, vlenb
+; RV32-NEXT:    slli a4, a4, 3
+; RV32-NEXT:    add a4, sp, a4
+; RV32-NEXT:    addi a4, a4, 16
 ; RV32-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
 ; RV32-NEXT:    bltu a0, a1, .LBB47_2
 ; RV32-NEXT:  # %bb.1:
@@ -2732,45 +3039,52 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64_unmasked(<vscale x 16 x i64> %va, i
 ; RV32-NEXT:  .LBB47_2:
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vx v16, v24, a2
-; RV32-NEXT:    vnot.v v24, v24
-; RV32-NEXT:    vand.vv v16, v24, v16
-; RV32-NEXT:    vsrl.vi v24, v16, 1
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vx v8, v16, a2
+; RV32-NEXT:    vnot.v v0, v16
+; RV32-NEXT:    vand.vv v16, v0, v8
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v0, v24, 2
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    vadd.vv v8, v0, v0
+; RV32-NEXT:    vxor.vv v8, v0, v8
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 24
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v24, v24, v8
-; RV32-NEXT:    vsub.vv v16, v16, v24
-; RV32-NEXT:    vand.vv v24, v16, v0
-; RV32-NEXT:    vsrl.vi v16, v16, 2
-; RV32-NEXT:    vand.vv v16, v16, v0
-; RV32-NEXT:    vadd.vv v16, v24, v16
-; RV32-NEXT:    vsrl.vi v24, v16, 4
-; RV32-NEXT:    vadd.vv v16, v16, v24
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v16, 1
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v8
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v16, v8
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vadd.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
+; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vx v8, v8, a3
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
 ; RV32-NEXT:    addi a0, a0, 16
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vmul.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vx v8, v8, a3
-; RV32-NEXT:    addi a0, sp, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    li a1, 24
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
 ; RV32-NEXT:    addi sp, sp, 16
@@ -2778,65 +3092,109 @@ define <vscale x 16 x i64> @vp_cttz_nxv16i64_unmasked(<vscale x 16 x i64> %va, i
 ;
 ; RV64-LABEL: vp_cttz_nxv16i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    sub a2, a0, a1
-; RV64-NEXT:    sltu a3, a0, a2
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    sub a1, a0, a2
+; RV64-NEXT:    sltu a3, a0, a1
 ; RV64-NEXT:    addi a3, a3, -1
-; RV64-NEXT:    and a3, a3, a2
-; RV64-NEXT:    li a2, 1
+; RV64-NEXT:    and a3, a3, a1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a4, a1, 32
+; RV64-NEXT:    add a1, a1, a4
+; RV64-NEXT:    vsetvli a4, zero, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v24, v16, a2
+; RV64-NEXT:    li a3, 1
+; RV64-NEXT:    vsub.vx v0, v16, a3
 ; RV64-NEXT:    vnot.v v16, v16
-; RV64-NEXT:    vand.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    lui a3, 349525
-; RV64-NEXT:    addiw a3, a3, 1365
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v24, v24, a3
-; RV64-NEXT:    vsub.vv v16, v16, v24
-; RV64-NEXT:    lui a4, 209715
-; RV64-NEXT:    addiw a4, a4, 819
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v24, v16, a4
-; RV64-NEXT:    vsrl.vi v16, v16, 2
-; RV64-NEXT:    vand.vx v16, v16, a4
-; RV64-NEXT:    vadd.vv v16, v24, v16
-; RV64-NEXT:    vsrl.vi v24, v16, 4
-; RV64-NEXT:    vadd.vv v16, v16, v24
-; RV64-NEXT:    lui a5, 61681
-; RV64-NEXT:    addiw a5, a5, -241
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vand.vx v16, v16, a5
-; RV64-NEXT:    lui a6, 4112
-; RV64-NEXT:    addiw a6, a6, 257
-; RV64-NEXT:    slli a7, a6, 32
-; RV64-NEXT:    add a6, a6, a7
-; RV64-NEXT:    vmul.vx v16, v16, a6
-; RV64-NEXT:    li a7, 56
-; RV64-NEXT:    vsrl.vx v16, v16, a7
-; RV64-NEXT:    bltu a0, a1, .LBB47_2
+; RV64-NEXT:    vand.vv v16, v16, v0
+; RV64-NEXT:    vsll.vi v0, v8, 2
+; RV64-NEXT:    vxor.vx v0, v0, a1
+; RV64-NEXT:    vadd.vv v24, v0, v0
+; RV64-NEXT:    vxor.vv v24, v0, v24
+; RV64-NEXT:    vsrl.vi v8, v16, 1
+; RV64-NEXT:    vand.vv v8, v8, v24
+; RV64-NEXT:    vsub.vv v8, v16, v8
+; RV64-NEXT:    vand.vv v16, v8, v0
+; RV64-NEXT:    vsrl.vi v8, v8, 2
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vl8r.v v16, (a4) # Unknown-size Folded Reload
+; RV64-NEXT:    vmv8r.v v0, v16
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    li a4, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a4
+; RV64-NEXT:    addi a5, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a5) # Unknown-size Folded Spill
+; RV64-NEXT:    bltu a0, a2, .LBB47_2
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    mv a0, a1
+; RV64-NEXT:    mv a0, a2
 ; RV64-NEXT:  .LBB47_2:
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v24, v8, a2
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v24
-; RV64-NEXT:    vsrl.vi v24, v8, 1
-; RV64-NEXT:    vand.vx v24, v24, a3
-; RV64-NEXT:    vsub.vv v8, v8, v24
-; RV64-NEXT:    vand.vx v24, v8, a4
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v8, v16, a3
+; RV64-NEXT:    vnot.v v24, v16
+; RV64-NEXT:    vand.vv v8, v24, v8
+; RV64-NEXT:    vsll.vi v24, v0, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v16, v24, v0
+; RV64-NEXT:    vsrl.vi v0, v8, 1
+; RV64-NEXT:    vand.vv v16, v0, v16
+; RV64-NEXT:    vsub.vv v8, v8, v16
+; RV64-NEXT:    vand.vv v16, v8, v24
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    vadd.vv v8, v24, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v24
-; RV64-NEXT:    vand.vx v8, v8, a5
-; RV64-NEXT:    vmul.vx v8, v8, a6
-; RV64-NEXT:    vsrl.vx v8, v8, a7
+; RV64-NEXT:    vand.vv v8, v8, v24
+; RV64-NEXT:    vadd.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vx v8, v8, a4
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
 ;
 ; CHECK-ZVBB-LABEL: vp_cttz_nxv16i64_unmasked:
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
index 36f22bd3259cf9..5654be9d1cb4a6 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll
@@ -913,35 +913,26 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
+; RV32-NEXT:    vsll.vi v11, v10, 2, v0.t
+; RV32-NEXT:    vxor.vv v11, v10, v11, v0.t
+; RV32-NEXT:    vsll.vi v12, v11, 1, v0.t
+; RV32-NEXT:    vxor.vv v12, v11, v12, v0.t
+; RV32-NEXT:    vand.vv v9, v9, v12, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
+; RV32-NEXT:    vand.vv v9, v8, v11, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v11, v0.t
+; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV32-NEXT:    vsrl.vi v9, v10, 3, v0.t
+; RV32-NEXT:    vand.vv v9, v10, v9, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -960,37 +951,34 @@ define <2 x i64> @vp_ctlz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    vsrl.vi v9, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v9, v8, a0, v0.t
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v9, v8, a1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT:    vsll.vi v11, v10, 2, v0.t
+; RV64-NEXT:    vxor.vx v11, v11, a1, v0.t
+; RV64-NEXT:    vsll.vi v12, v11, 1, v0.t
+; RV64-NEXT:    vxor.vv v12, v11, v12, v0.t
+; RV64-NEXT:    vand.vv v9, v9, v12, v0.t
 ; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v9, v8, v11, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v11, v0.t
 ; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v9, v10, 3, v0.t
+; RV64-NEXT:    vand.vx v9, v9, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -1017,35 +1005,26 @@ define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v9
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10
+; RV32-NEXT:    vsll.vi v11, v10, 2
+; RV32-NEXT:    vxor.vv v11, v10, v11
+; RV32-NEXT:    vadd.vv v12, v11, v11
+; RV32-NEXT:    vxor.vv v12, v11, v12
+; RV32-NEXT:    vand.vv v9, v9, v12
 ; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9
+; RV32-NEXT:    vand.vv v9, v8, v11
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vadd.vv v8, v10, v8
+; RV32-NEXT:    vand.vv v8, v8, v11
+; RV32-NEXT:    vadd.vv v8, v9, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vi v9, v10, 3
+; RV32-NEXT:    vand.vv v9, v10, v9
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -1064,37 +1043,34 @@ define <2 x i64> @vp_ctlz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    vsrl.vi v9, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v9, v8, a0
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v9, v8, a1
 ; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    vnot.v v8, v8
 ; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT:    vsll.vi v11, v10, 2
+; RV64-NEXT:    vxor.vx v11, v11, a1
+; RV64-NEXT:    vadd.vv v12, v11, v11
+; RV64-NEXT:    vxor.vv v12, v11, v12
+; RV64-NEXT:    vand.vv v9, v9, v12
 ; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0
+; RV64-NEXT:    vand.vv v9, v8, v11
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v11
 ; RV64-NEXT:    vadd.vv v8, v9, v8
 ; RV64-NEXT:    vsrl.vi v9, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v9, v10, 3
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -1125,35 +1101,26 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
+; RV32-NEXT:    vsll.vi v14, v12, 2, v0.t
+; RV32-NEXT:    vxor.vv v14, v12, v14, v0.t
+; RV32-NEXT:    vsll.vi v16, v14, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v14, v16, v0.t
+; RV32-NEXT:    vand.vv v10, v10, v16, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
+; RV32-NEXT:    vand.vv v10, v8, v14, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v14, v0.t
+; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vsrl.vi v10, v12, 3, v0.t
+; RV32-NEXT:    vand.vv v10, v12, v10, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -1172,37 +1139,34 @@ define <4 x i64> @vp_ctlz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    vsrl.vi v10, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v10, v8, a0, v0.t
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v10, v8, a1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-NEXT:    vsll.vi v14, v12, 2, v0.t
+; RV64-NEXT:    vxor.vx v14, v14, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v14, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v14, v16, v0.t
+; RV64-NEXT:    vand.vv v10, v10, v16, v0.t
 ; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v10, v8, v14, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v14, v0.t
 ; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v10, v12, 3, v0.t
+; RV64-NEXT:    vand.vx v10, v10, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -1229,35 +1193,26 @@ define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v10
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12
+; RV32-NEXT:    vsll.vi v14, v12, 2
+; RV32-NEXT:    vxor.vv v14, v12, v14
+; RV32-NEXT:    vadd.vv v16, v14, v14
+; RV32-NEXT:    vxor.vv v16, v14, v16
+; RV32-NEXT:    vand.vv v10, v10, v16
 ; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10
+; RV32-NEXT:    vand.vv v10, v8, v14
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vadd.vv v8, v12, v8
+; RV32-NEXT:    vand.vv v8, v8, v14
+; RV32-NEXT:    vadd.vv v8, v10, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vi v10, v12, 3
+; RV32-NEXT:    vand.vv v10, v12, v10
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -1276,37 +1231,34 @@ define <4 x i64> @vp_ctlz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v10
 ; RV64-NEXT:    vsrl.vi v10, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v10, v8, a0
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v10, v8, a1
 ; RV64-NEXT:    vor.vv v8, v8, v10
 ; RV64-NEXT:    vnot.v v8, v8
 ; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-NEXT:    vsll.vi v14, v12, 2
+; RV64-NEXT:    vxor.vx v14, v14, a1
+; RV64-NEXT:    vadd.vv v16, v14, v14
+; RV64-NEXT:    vxor.vv v16, v14, v16
+; RV64-NEXT:    vand.vv v10, v10, v16
 ; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0
+; RV64-NEXT:    vand.vv v10, v8, v14
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v14
 ; RV64-NEXT:    vadd.vv v8, v10, v8
 ; RV64-NEXT:    vsrl.vi v10, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v10, v12, 3
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vmul.vv v8, v8, v10
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -1335,38 +1287,29 @@ define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-NEXT:    li a1, 32
 ; RV32-NEXT:    vsrl.vx v12, v8, a1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vnot.v v12, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v12, 1, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vmv.v.x v8, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vsll.vi v20, v8, 2, v0.t
+; RV32-NEXT:    vxor.vv v20, v8, v20, v0.t
+; RV32-NEXT:    vsll.vi v24, v20, 1, v0.t
+; RV32-NEXT:    vxor.vv v24, v20, v24, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    vsub.vv v12, v12, v16, v0.t
+; RV32-NEXT:    vand.vv v16, v12, v20, v0.t
+; RV32-NEXT:    vsrl.vi v12, v12, 2, v0.t
+; RV32-NEXT:    vand.vv v12, v12, v20, v0.t
+; RV32-NEXT:    vadd.vv v12, v16, v12, v0.t
+; RV32-NEXT:    vsrl.vi v16, v12, 4, v0.t
+; RV32-NEXT:    vadd.vv v12, v12, v16, v0.t
+; RV32-NEXT:    vand.vv v12, v12, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vmul.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV32-NEXT:    ret
@@ -1384,37 +1327,34 @@ define <8 x i64> @vp_ctlz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    vsrl.vi v12, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v12, v8, a0, v0.t
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v12, v8, a1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vnot.v v12, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v12, 1, v0.t
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-NEXT:    vsll.vi v20, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v20, v20, a1, v0.t
+; RV64-NEXT:    vsll.vi v24, v20, 1, v0.t
+; RV64-NEXT:    vxor.vv v24, v20, v24, v0.t
+; RV64-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV64-NEXT:    vsub.vv v12, v12, v16, v0.t
+; RV64-NEXT:    vand.vv v16, v12, v20, v0.t
+; RV64-NEXT:    vsrl.vi v12, v12, 2, v0.t
+; RV64-NEXT:    vand.vv v12, v12, v20, v0.t
+; RV64-NEXT:    vadd.vv v12, v16, v12, v0.t
+; RV64-NEXT:    vsrl.vi v16, v12, 4, v0.t
+; RV64-NEXT:    vadd.vv v12, v12, v16, v0.t
+; RV64-NEXT:    vand.vx v12, v12, a1, v0.t
+; RV64-NEXT:    vsrl.vi v8, v8, 3, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -1441,35 +1381,26 @@ define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vor.vv v8, v8, v12
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16
+; RV32-NEXT:    vsll.vi v20, v16, 2
+; RV32-NEXT:    vxor.vv v20, v16, v20
+; RV32-NEXT:    vadd.vv v24, v20, v20
+; RV32-NEXT:    vxor.vv v24, v20, v24
+; RV32-NEXT:    vand.vv v12, v12, v24
 ; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12
+; RV32-NEXT:    vand.vv v12, v8, v20
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vadd.vv v8, v16, v8
+; RV32-NEXT:    vand.vv v8, v8, v20
+; RV32-NEXT:    vadd.vv v8, v12, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v12, v16, 3
+; RV32-NEXT:    vand.vv v12, v16, v12
 ; RV32-NEXT:    vmul.vv v8, v8, v12
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -1488,37 +1419,34 @@ define <8 x i64> @vp_ctlz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v12
 ; RV64-NEXT:    vsrl.vi v12, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v12, v8, a0
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v12, v8, a1
 ; RV64-NEXT:    vor.vv v8, v8, v12
 ; RV64-NEXT:    vnot.v v8, v8
 ; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-NEXT:    vsll.vi v20, v16, 2
+; RV64-NEXT:    vxor.vx v20, v20, a1
+; RV64-NEXT:    vadd.vv v24, v20, v20
+; RV64-NEXT:    vxor.vv v24, v20, v24
+; RV64-NEXT:    vand.vv v12, v12, v24
 ; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0
+; RV64-NEXT:    vand.vv v12, v8, v20
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v20
 ; RV64-NEXT:    vadd.vv v8, v12, v8
 ; RV64-NEXT:    vsrl.vi v12, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v12, v16, 3
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vmul.vv v8, v8, v12
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -1533,24 +1461,17 @@ declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32)
 define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_v15i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
@@ -1566,40 +1487,79 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsll.vi v24, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v24, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v24, v16, v0.t
+; RV32-NEXT:    vand.vv v24, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v24, v16, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v24, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_v15i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
@@ -1611,39 +1571,65 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a0, v0.t
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v24, v16, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v16, v24, v16, v0.t
+; RV64-NEXT:    vand.vv v24, v16, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vadd.vv v8, v24, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl)
   ret <15 x i64> %v
@@ -1652,24 +1638,16 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl
 define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_v15i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vsrl.vi v16, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v16
@@ -1685,40 +1663,57 @@ define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vsrl.vx v16, v8, a1
 ; RV32-NEXT:    vor.vv v8, v8, v16
 ; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
 ; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v8, (a1), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    vsll.vi v0, v8, 2
+; RV32-NEXT:    vxor.vv v0, v8, v0
+; RV32-NEXT:    vadd.vv v24, v0, v0
+; RV32-NEXT:    vxor.vv v24, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v16, v24
+; RV32-NEXT:    vand.vv v24, v16, v0
+; RV32-NEXT:    vsrl.vi v16, v16, 2
+; RV32-NEXT:    vand.vv v16, v16, v0
+; RV32-NEXT:    vadd.vv v16, v24, v16
+; RV32-NEXT:    vsrl.vi v24, v16, 4
+; RV32-NEXT:    vadd.vv v16, v16, v24
+; RV32-NEXT:    vand.vv v16, v16, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 3
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vmul.vv v8, v16, v8
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_v15i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
 ; RV64-NEXT:    vor.vv v8, v8, v16
@@ -1730,39 +1725,54 @@ define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v16
 ; RV64-NEXT:    vsrl.vi v16, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a0
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v16, v8, a1
 ; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vnot.v v16, v8
+; RV64-NEXT:    vsrl.vi v8, v16, 1
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v0, v8, 2
+; RV64-NEXT:    vxor.vx v0, v0, a1
+; RV64-NEXT:    vadd.vv v24, v0, v0
+; RV64-NEXT:    vxor.vv v24, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v24, v8, v24
+; RV64-NEXT:    vsub.vv v16, v16, v24
+; RV64-NEXT:    vand.vv v24, v16, v0
+; RV64-NEXT:    vsrl.vi v16, v16, 2
+; RV64-NEXT:    vand.vv v16, v16, v0
+; RV64-NEXT:    vadd.vv v16, v24, v16
+; RV64-NEXT:    vsrl.vi v24, v16, 4
+; RV64-NEXT:    vadd.vv v16, v16, v24
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v8, v8, 3
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vmul.vv v8, v16, v8
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <15 x i1> poison, i1 true, i32 0
   %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
@@ -1775,24 +1785,17 @@ declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
 define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_v16i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
@@ -1808,40 +1811,79 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsll.vi v24, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v24, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v24, v16, v0.t
+; RV32-NEXT:    vand.vv v24, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v24, v16, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v24, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_v16i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
@@ -1853,39 +1895,65 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a0, v0.t
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v24, v16, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v16, v24, v16, v0.t
+; RV64-NEXT:    vand.vv v24, v16, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vadd.vv v8, v24, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl)
   ret <16 x i64> %v
@@ -1894,24 +1962,16 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl
 define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_v16i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vsrl.vi v16, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v16
@@ -1927,40 +1987,57 @@ define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
 ; RV32-NEXT:    vsrl.vx v16, v8, a1
 ; RV32-NEXT:    vor.vv v8, v8, v16
 ; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
 ; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v8, (a1), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    vsll.vi v0, v8, 2
+; RV32-NEXT:    vxor.vv v0, v8, v0
+; RV32-NEXT:    vadd.vv v24, v0, v0
+; RV32-NEXT:    vxor.vv v24, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v16, v24
+; RV32-NEXT:    vand.vv v24, v16, v0
+; RV32-NEXT:    vsrl.vi v16, v16, 2
+; RV32-NEXT:    vand.vv v16, v16, v0
+; RV32-NEXT:    vadd.vv v16, v24, v16
+; RV32-NEXT:    vsrl.vi v24, v16, 4
+; RV32-NEXT:    vadd.vv v16, v16, v24
+; RV32-NEXT:    vand.vv v16, v16, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 3
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vmul.vv v8, v16, v8
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_v16i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
 ; RV64-NEXT:    vor.vv v8, v8, v16
@@ -1972,39 +2049,54 @@ define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v16
 ; RV64-NEXT:    vsrl.vi v16, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a0
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v16, v8, a1
 ; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vnot.v v16, v8
+; RV64-NEXT:    vsrl.vi v8, v16, 1
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v0, v8, 2
+; RV64-NEXT:    vxor.vx v0, v0, a1
+; RV64-NEXT:    vadd.vv v24, v0, v0
+; RV64-NEXT:    vxor.vv v24, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v24, v8, v24
+; RV64-NEXT:    vsub.vv v16, v16, v24
+; RV64-NEXT:    vand.vv v24, v16, v0
+; RV64-NEXT:    vsrl.vi v16, v16, 2
+; RV64-NEXT:    vand.vv v16, v16, v0
+; RV64-NEXT:    vadd.vv v16, v24, v16
+; RV64-NEXT:    vsrl.vi v24, v16, 4
+; RV64-NEXT:    vadd.vv v16, v16, v24
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v8, v8, 3
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vmul.vv v8, v16, v8
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
@@ -2017,37 +2109,26 @@ declare <32 x i64> @llvm.vp.ctlz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32)
 define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_v32i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -48
-; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    li a2, 48
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 48
+; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; RV32-NEXT:    vslidedown.vi v24, v0, 2
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 44(sp)
-; RV32-NEXT:    sw a1, 40(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 36(sp)
-; RV32-NEXT:    sw a1, 32(sp)
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    li a3, 16
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    mv a2, a0
 ; RV32-NEXT:    bltu a0, a3, .LBB34_2
 ; RV32-NEXT:  # %bb.1:
@@ -2069,113 +2150,113 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 4
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a3, sp, 40
+; RV32-NEXT:    addi a3, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vlse64.v v16, (a3), zero
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    li a4, 40
 ; RV32-NEXT:    mul a3, a3, a4
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a3, sp, 32
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vxor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
-; RV32-NEXT:    addi a3, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
-; RV32-NEXT:    addi a3, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 3
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a2, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    slli a3, a3, 4
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a3, a0, -16
 ; RV32-NEXT:    sltu a0, a0, a3
@@ -2184,9 +2265,10 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a0, a0, a3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
@@ -2201,81 +2283,119 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    addi a0, sp, 48
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vxor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 24
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
-; RV32-NEXT:    addi a0, sp, 48
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    vsub.vv v16, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 40
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    li a1, 48
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 48
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_v32i64:
@@ -2283,23 +2403,25 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    li a2, 48
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    sub sp, sp, a1
-; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    li a1, 16
 ; RV64-NEXT:    vslidedown.vi v24, v0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    bltu a0, a2, .LBB34_2
+; RV64-NEXT:    mv a3, a0
+; RV64-NEXT:    bltu a0, a1, .LBB34_2
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    li a1, 16
+; RV64-NEXT:    li a3, 16
 ; RV64-NEXT:  .LBB34_2:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
@@ -2310,81 +2432,238 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    vsrl.vx v16, v8, a2, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a2, 349525
-; RV64-NEXT:    addiw a2, a2, 1365
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a3, 209715
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a4, a1, 32
+; RV64-NEXT:    add a1, a1, a4
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    li a5, 40
+; RV64-NEXT:    mul a4, a4, a5
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vxor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a4, 61681
-; RV64-NEXT:    addiw a4, a4, -241
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    lui a5, 4112
-; RV64-NEXT:    addiw a5, a5, 257
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a6, v0.t
-; RV64-NEXT:    addi a7, sp, 16
-; RV64-NEXT:    vs8r.v v8, (a7) # Unknown-size Folded Spill
-; RV64-NEXT:    addi a7, a0, -16
-; RV64-NEXT:    sltu a0, a0, a7
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 40
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a3, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a4, a0, -16
+; RV64-NEXT:    sltu a0, a0, a4
 ; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a7
+; RV64-NEXT:    and a0, a0, a4
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a0, a0, a4
 ; RV64-NEXT:    add a0, sp, a0
 ; RV64-NEXT:    addi a0, a0, 16
-; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v16, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v8, v16, 2, v0.t
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v8, v16, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vx v16, v8, a2, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsll.vi v16, v16, 2, v0.t
+; RV64-NEXT:    vxor.vx v16, v16, a1, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsll.vi v8, v16, 1, v0.t
+; RV64-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
-; RV64-NEXT:    vsrl.vx v16, v8, a6, v0.t
-; RV64-NEXT:    addi a0, sp, 16
-; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vx v16, v8, a3, v0.t
 ; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 48
+; RV64-NEXT:    mul a0, a0, a1
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
@@ -2395,160 +2674,173 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_v32i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -48
-; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb
-; RV32-NEXT:    vmv8r.v v24, v16
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 44(sp)
-; RV32-NEXT:    sw a1, 40(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 36(sp)
-; RV32-NEXT:    sw a1, 32(sp)
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    bltu a0, a2, .LBB35_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a1, 16
 ; RV32-NEXT:  .LBB35_2:
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v24, v8, 1
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 2
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 8
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 16
+; RV32-NEXT:    vor.vv v8, v8, v24
 ; RV32-NEXT:    li a2, 32
-; RV32-NEXT:    vsrl.vx v16, v8, a2
-; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vx v24, v8, a2
+; RV32-NEXT:    vor.vv v8, v8, v24
 ; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    addi a3, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a3, sp, 40
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a3), zero
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 3
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v0, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 32
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a3, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a3), zero
+; RV32-NEXT:    vlse64.v v24, (a3), zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v0, v24, 2
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    vadd.vv v16, v0, v0
+; RV32-NEXT:    vxor.vv v16, v0, v16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v16
 ; RV32-NEXT:    vand.vv v16, v8, v0
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
 ; RV32-NEXT:    vand.vv v8, v8, v0
 ; RV32-NEXT:    vadd.vv v8, v16, v8
 ; RV32-NEXT:    vsrl.vi v16, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    addi a3, sp, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a1, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a1
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    slli a3, a3, 3
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a3, a0, -16
 ; RV32-NEXT:    sltu a0, a0, a3
 ; RV32-NEXT:    addi a0, a0, -1
 ; RV32-NEXT:    and a0, a0, a3
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v8, v24, 1
-; RV32-NEXT:    vor.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v24, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v24, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v24, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vx v24, v8, a2
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 1
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a2, 24
-; RV32-NEXT:    mul a0, a0, a2
+; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v24, v24, v16
-; RV32-NEXT:    vsub.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v24, v8, v0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v24
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vor.vv v16, v8, v16
+; RV32-NEXT:    vsrl.vi v0, v16, 2
+; RV32-NEXT:    vor.vv v16, v16, v0
+; RV32-NEXT:    vsrl.vi v0, v16, 4
+; RV32-NEXT:    vor.vv v16, v16, v0
+; RV32-NEXT:    vsrl.vi v0, v16, 8
+; RV32-NEXT:    vor.vv v16, v16, v0
+; RV32-NEXT:    vsrl.vi v0, v16, 16
+; RV32-NEXT:    vor.vv v16, v16, v0
+; RV32-NEXT:    vsrl.vx v0, v16, a2
+; RV32-NEXT:    vor.vv v16, v16, v0
+; RV32-NEXT:    vnot.v v8, v16
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v0, v24, 2
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    vadd.vv v16, v0, v0
+; RV32-NEXT:    vxor.vv v16, v0, v16
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    addi a0, sp, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vadd.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    vsrl.vx v16, v8, a1
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 48
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_v32i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a2, 16
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    bltu a0, a2, .LBB35_2
-; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; RV64-NEXT:    li a1, 16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    mv a2, a0
+; RV64-NEXT:    bltu a0, a1, .LBB35_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a2, 16
 ; RV64-NEXT:  .LBB35_2:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v24, v8, 1
 ; RV64-NEXT:    vor.vv v8, v8, v24
 ; RV64-NEXT:    vsrl.vi v24, v8, 2
@@ -2559,69 +2851,117 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
 ; RV64-NEXT:    vor.vv v8, v8, v24
 ; RV64-NEXT:    vsrl.vi v24, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    vsrl.vx v24, v8, a1
+; RV64-NEXT:    li a3, 32
+; RV64-NEXT:    vsrl.vx v24, v8, a3
 ; RV64-NEXT:    vor.vv v8, v8, v24
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 1
-; RV64-NEXT:    lui a2, 349525
-; RV64-NEXT:    addiw a2, a2, 1365
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    vand.vx v24, v24, a2
-; RV64-NEXT:    vsub.vv v8, v8, v24
-; RV64-NEXT:    lui a3, 209715
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v24, v8, a3
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a4, a1, 32
+; RV64-NEXT:    add a1, a1, a4
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v0, v16, 2
+; RV64-NEXT:    vmv8r.v v24, v16
+; RV64-NEXT:    vxor.vx v0, v0, a1
+; RV64-NEXT:    vadd.vv v16, v0, v0
+; RV64-NEXT:    vxor.vv v16, v0, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v8, v8, v16
+; RV64-NEXT:    vand.vv v16, v8, v0
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vadd.vv v8, v24, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v24
-; RV64-NEXT:    lui a4, 61681
-; RV64-NEXT:    addiw a4, a4, -241
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    lui a5, 4112
-; RV64-NEXT:    addiw a5, a5, 257
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vmul.vx v8, v8, a5
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a6
-; RV64-NEXT:    addi a7, a0, -16
-; RV64-NEXT:    sltu a0, a0, a7
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v16, v24, 3
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    li a2, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a2
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a4, a0, -16
+; RV64-NEXT:    sltu a0, a0, a4
 ; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a7
+; RV64-NEXT:    and a0, a0, a4
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 2
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 4
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 8
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 16
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vx v24, v16, a1
-; RV64-NEXT:    vor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vor.vv v16, v8, v16
+; RV64-NEXT:    vsrl.vi v0, v16, 2
+; RV64-NEXT:    vor.vv v16, v16, v0
+; RV64-NEXT:    vsrl.vi v0, v16, 4
+; RV64-NEXT:    vor.vv v16, v16, v0
+; RV64-NEXT:    vsrl.vi v0, v16, 8
+; RV64-NEXT:    vor.vv v16, v16, v0
+; RV64-NEXT:    vsrl.vi v0, v16, 16
+; RV64-NEXT:    vor.vv v16, v16, v0
+; RV64-NEXT:    vsrl.vx v0, v16, a3
+; RV64-NEXT:    vor.vv v16, v16, v0
 ; RV64-NEXT:    vnot.v v16, v16
+; RV64-NEXT:    vsll.vi v0, v24, 2
+; RV64-NEXT:    vxor.vx v0, v0, a1
+; RV64-NEXT:    vadd.vv v24, v0, v0
+; RV64-NEXT:    vxor.vv v8, v0, v24
 ; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    vand.vx v24, v24, a2
-; RV64-NEXT:    vsub.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v24, v16, a3
-; RV64-NEXT:    vsrl.vi v16, v16, 2
-; RV64-NEXT:    vand.vx v16, v16, a3
-; RV64-NEXT:    vadd.vv v16, v24, v16
-; RV64-NEXT:    vsrl.vi v24, v16, 4
-; RV64-NEXT:    vadd.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v16, v16, a4
-; RV64-NEXT:    vmul.vx v16, v16, a5
-; RV64-NEXT:    vsrl.vx v16, v16, a6
+; RV64-NEXT:    vand.vv v8, v24, v8
+; RV64-NEXT:    vsub.vv v8, v16, v8
+; RV64-NEXT:    vand.vv v16, v8, v0
+; RV64-NEXT:    vsrl.vi v8, v8, 2
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vx v16, v8, a2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <32 x i1> poison, i1 true, i32 0
   %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
@@ -3512,35 +3852,26 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
 ; RV32-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
+; RV32-NEXT:    vsll.vi v11, v10, 2, v0.t
+; RV32-NEXT:    vxor.vv v11, v10, v11, v0.t
+; RV32-NEXT:    vsll.vi v12, v11, 1, v0.t
+; RV32-NEXT:    vxor.vv v12, v11, v12, v0.t
+; RV32-NEXT:    vand.vv v9, v9, v12, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
+; RV32-NEXT:    vand.vv v9, v8, v11, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v11, v0.t
+; RV32-NEXT:    vadd.vv v8, v9, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV32-NEXT:    vsrl.vi v9, v10, 3, v0.t
+; RV32-NEXT:    vand.vv v9, v10, v9, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -3559,37 +3890,34 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
 ; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    vsrl.vi v9, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v9, v8, a0, v0.t
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v9, v8, a1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT:    vsll.vi v11, v10, 2, v0.t
+; RV64-NEXT:    vxor.vx v11, v11, a1, v0.t
+; RV64-NEXT:    vsll.vi v12, v11, 1, v0.t
+; RV64-NEXT:    vxor.vv v12, v11, v12, v0.t
+; RV64-NEXT:    vand.vv v9, v9, v12, v0.t
 ; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v9, v8, v11, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v11, v0.t
 ; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v9, v10, 3, v0.t
+; RV64-NEXT:    vand.vx v9, v9, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -3616,35 +3944,26 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
 ; RV32-NEXT:    vor.vv v8, v8, v9
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10
+; RV32-NEXT:    vsll.vi v11, v10, 2
+; RV32-NEXT:    vxor.vv v11, v10, v11
+; RV32-NEXT:    vadd.vv v12, v11, v11
+; RV32-NEXT:    vxor.vv v12, v11, v12
+; RV32-NEXT:    vand.vv v9, v9, v12
 ; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9
+; RV32-NEXT:    vand.vv v9, v8, v11
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vadd.vv v8, v10, v8
+; RV32-NEXT:    vand.vv v8, v8, v11
+; RV32-NEXT:    vadd.vv v8, v9, v8
 ; RV32-NEXT:    vsrl.vi v9, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vsrl.vi v9, v10, 3
+; RV32-NEXT:    vand.vv v9, v10, v9
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -3663,37 +3982,34 @@ define <2 x i64> @vp_ctlz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
 ; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    vsrl.vi v9, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v9
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v9, v8, a0
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v9, v8, a1
 ; RV64-NEXT:    vor.vv v8, v8, v9
 ; RV64-NEXT:    vnot.v v8, v8
 ; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT:    vsll.vi v11, v10, 2
+; RV64-NEXT:    vxor.vx v11, v11, a1
+; RV64-NEXT:    vadd.vv v12, v11, v11
+; RV64-NEXT:    vxor.vv v12, v11, v12
+; RV64-NEXT:    vand.vv v9, v9, v12
 ; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0
+; RV64-NEXT:    vand.vv v9, v8, v11
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v11
 ; RV64-NEXT:    vadd.vv v8, v9, v8
 ; RV64-NEXT:    vsrl.vi v9, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v9, v10, 3
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -3722,35 +4038,26 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
 ; RV32-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
+; RV32-NEXT:    vsll.vi v14, v12, 2, v0.t
+; RV32-NEXT:    vxor.vv v14, v12, v14, v0.t
+; RV32-NEXT:    vsll.vi v16, v14, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v14, v16, v0.t
+; RV32-NEXT:    vand.vv v10, v10, v16, v0.t
 ; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
+; RV32-NEXT:    vand.vv v10, v8, v14, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v14, v0.t
+; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vsrl.vi v10, v12, 3, v0.t
+; RV32-NEXT:    vand.vv v10, v12, v10, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -3769,37 +4076,34 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    vsrl.vi v10, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v10, v8, a0, v0.t
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v10, v8, a1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-NEXT:    vsll.vi v14, v12, 2, v0.t
+; RV64-NEXT:    vxor.vx v14, v14, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v14, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v14, v16, v0.t
+; RV64-NEXT:    vand.vv v10, v10, v16, v0.t
 ; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v10, v8, v14, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v14, v0.t
 ; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v10, v12, 3, v0.t
+; RV64-NEXT:    vand.vx v10, v10, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -3826,35 +4130,26 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
 ; RV32-NEXT:    vor.vv v8, v8, v10
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12
+; RV32-NEXT:    vsll.vi v14, v12, 2
+; RV32-NEXT:    vxor.vv v14, v12, v14
+; RV32-NEXT:    vadd.vv v16, v14, v14
+; RV32-NEXT:    vxor.vv v16, v14, v16
+; RV32-NEXT:    vand.vv v10, v10, v16
 ; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10
+; RV32-NEXT:    vand.vv v10, v8, v14
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vadd.vv v8, v12, v8
+; RV32-NEXT:    vand.vv v8, v8, v14
+; RV32-NEXT:    vadd.vv v8, v10, v8
 ; RV32-NEXT:    vsrl.vi v10, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vi v10, v12, 3
+; RV32-NEXT:    vand.vv v10, v12, v10
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -3873,37 +4168,34 @@ define <4 x i64> @vp_ctlz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
 ; RV64-NEXT:    vor.vv v8, v8, v10
 ; RV64-NEXT:    vsrl.vi v10, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v10
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v10, v8, a0
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v10, v8, a1
 ; RV64-NEXT:    vor.vv v8, v8, v10
 ; RV64-NEXT:    vnot.v v8, v8
 ; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV64-NEXT:    vsll.vi v14, v12, 2
+; RV64-NEXT:    vxor.vx v14, v14, a1
+; RV64-NEXT:    vadd.vv v16, v14, v14
+; RV64-NEXT:    vxor.vv v16, v14, v16
+; RV64-NEXT:    vand.vv v10, v10, v16
 ; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0
+; RV64-NEXT:    vand.vv v10, v8, v14
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v14
 ; RV64-NEXT:    vadd.vv v8, v10, v8
 ; RV64-NEXT:    vsrl.vi v10, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v10, v12, 3
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vmul.vv v8, v8, v10
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -3930,38 +4222,29 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
 ; RV32-NEXT:    li a1, 32
 ; RV32-NEXT:    vsrl.vx v12, v8, a1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vnot.v v12, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v12, 1, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
+; RV32-NEXT:    vmv.v.x v8, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vsll.vi v20, v8, 2, v0.t
+; RV32-NEXT:    vxor.vv v20, v8, v20, v0.t
+; RV32-NEXT:    vsll.vi v24, v20, 1, v0.t
+; RV32-NEXT:    vxor.vv v24, v20, v24, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    vsub.vv v12, v12, v16, v0.t
+; RV32-NEXT:    vand.vv v16, v12, v20, v0.t
+; RV32-NEXT:    vsrl.vi v12, v12, 2, v0.t
+; RV32-NEXT:    vand.vv v12, v12, v20, v0.t
+; RV32-NEXT:    vadd.vv v12, v16, v12, v0.t
+; RV32-NEXT:    vsrl.vi v16, v12, 4, v0.t
+; RV32-NEXT:    vadd.vv v12, v12, v16, v0.t
+; RV32-NEXT:    vand.vv v12, v12, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vmul.vv v8, v12, v8, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV32-NEXT:    ret
@@ -3979,37 +4262,34 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
 ; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    vsrl.vi v12, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v12, v8, a0, v0.t
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v12, v8, a1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vnot.v v12, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v12, 1, v0.t
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-NEXT:    vsll.vi v20, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v20, v20, a1, v0.t
+; RV64-NEXT:    vsll.vi v24, v20, 1, v0.t
+; RV64-NEXT:    vxor.vv v24, v20, v24, v0.t
+; RV64-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV64-NEXT:    vsub.vv v12, v12, v16, v0.t
+; RV64-NEXT:    vand.vv v16, v12, v20, v0.t
+; RV64-NEXT:    vsrl.vi v12, v12, 2, v0.t
+; RV64-NEXT:    vand.vv v12, v12, v20, v0.t
+; RV64-NEXT:    vadd.vv v12, v16, v12, v0.t
+; RV64-NEXT:    vsrl.vi v16, v12, 4, v0.t
+; RV64-NEXT:    vadd.vv v12, v12, v16, v0.t
+; RV64-NEXT:    vand.vx v12, v12, a1, v0.t
+; RV64-NEXT:    vsrl.vi v8, v8, 3, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v12, v8, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -4036,35 +4316,26 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %
 ; RV32-NEXT:    vor.vv v8, v8, v12
 ; RV32-NEXT:    vnot.v v8, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
+; RV32-NEXT:    lui a1, 61681
+; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v16, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16
+; RV32-NEXT:    vsll.vi v20, v16, 2
+; RV32-NEXT:    vxor.vv v20, v16, v20
+; RV32-NEXT:    vadd.vv v24, v20, v20
+; RV32-NEXT:    vxor.vv v24, v20, v24
+; RV32-NEXT:    vand.vv v12, v12, v24
 ; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12
+; RV32-NEXT:    vand.vv v12, v8, v20
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vadd.vv v8, v16, v8
+; RV32-NEXT:    vand.vv v8, v8, v20
+; RV32-NEXT:    vadd.vv v8, v12, v8
 ; RV32-NEXT:    vsrl.vi v12, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 61681
-; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v12, v16, 3
+; RV32-NEXT:    vand.vv v12, v16, v12
 ; RV32-NEXT:    vmul.vv v8, v8, v12
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -4083,37 +4354,34 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %
 ; RV64-NEXT:    vor.vv v8, v8, v12
 ; RV64-NEXT:    vsrl.vi v12, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v12
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v12, v8, a0
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v12, v8, a1
 ; RV64-NEXT:    vor.vv v8, v8, v12
 ; RV64-NEXT:    vnot.v v8, v8
 ; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV64-NEXT:    vsll.vi v20, v16, 2
+; RV64-NEXT:    vxor.vx v20, v20, a1
+; RV64-NEXT:    vadd.vv v24, v20, v20
+; RV64-NEXT:    vxor.vv v24, v20, v24
+; RV64-NEXT:    vand.vv v12, v12, v24
 ; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0
+; RV64-NEXT:    vand.vv v12, v8, v20
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v20
 ; RV64-NEXT:    vadd.vv v8, v12, v8
 ; RV64-NEXT:    vsrl.vi v12, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v12, v16, 3
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vmul.vv v8, v8, v12
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -4126,24 +4394,17 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %
 define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_zero_undef_v15i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
@@ -4159,40 +4420,79 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z
 ; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsll.vi v24, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v24, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v24, v16, v0.t
+; RV32-NEXT:    vand.vv v24, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v24, v16, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v24, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_zero_undef_v15i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
@@ -4204,39 +4504,65 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a0, v0.t
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v24, v16, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v16, v24, v16, v0.t
+; RV64-NEXT:    vand.vv v24, v16, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vadd.vv v8, v24, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %v = call <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64> %va, i1 true, <15 x i1> %m, i32 %evl)
   ret <15 x i64> %v
@@ -4245,24 +4571,16 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z
 define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_zero_undef_v15i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vsrl.vi v16, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v16
@@ -4278,40 +4596,57 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex
 ; RV32-NEXT:    vsrl.vx v16, v8, a1
 ; RV32-NEXT:    vor.vv v8, v8, v16
 ; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
 ; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v8, (a1), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    vsll.vi v0, v8, 2
+; RV32-NEXT:    vxor.vv v0, v8, v0
+; RV32-NEXT:    vadd.vv v24, v0, v0
+; RV32-NEXT:    vxor.vv v24, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v16, v24
+; RV32-NEXT:    vand.vv v24, v16, v0
+; RV32-NEXT:    vsrl.vi v16, v16, 2
+; RV32-NEXT:    vand.vv v16, v16, v0
+; RV32-NEXT:    vadd.vv v16, v24, v16
+; RV32-NEXT:    vsrl.vi v24, v16, 4
+; RV32-NEXT:    vadd.vv v16, v16, v24
+; RV32-NEXT:    vand.vv v16, v16, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 3
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vmul.vv v8, v16, v8
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_zero_undef_v15i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
 ; RV64-NEXT:    vor.vv v8, v8, v16
@@ -4323,39 +4658,54 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex
 ; RV64-NEXT:    vor.vv v8, v8, v16
 ; RV64-NEXT:    vsrl.vi v16, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a0
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v16, v8, a1
 ; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vnot.v v16, v8
+; RV64-NEXT:    vsrl.vi v8, v16, 1
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v0, v8, 2
+; RV64-NEXT:    vxor.vx v0, v0, a1
+; RV64-NEXT:    vadd.vv v24, v0, v0
+; RV64-NEXT:    vxor.vv v24, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v24, v8, v24
+; RV64-NEXT:    vsub.vv v16, v16, v24
+; RV64-NEXT:    vand.vv v24, v16, v0
+; RV64-NEXT:    vsrl.vi v16, v16, 2
+; RV64-NEXT:    vand.vv v16, v16, v0
+; RV64-NEXT:    vadd.vv v16, v24, v16
+; RV64-NEXT:    vsrl.vi v24, v16, 4
+; RV64-NEXT:    vadd.vv v16, v16, v24
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v8, v8, 3
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vmul.vv v8, v16, v8
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <15 x i1> poison, i1 true, i32 0
   %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
@@ -4366,24 +4716,17 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex
 define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_zero_undef_v16i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
@@ -4399,40 +4742,79 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z
 ; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsll.vi v24, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v24, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v24, v16, v0.t
+; RV32-NEXT:    vand.vv v24, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v24, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v24, v16, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v24, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_zero_undef_v16i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
@@ -4444,39 +4826,65 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a0, v0.t
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v24, v16, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v16, v24, v16, v0.t
+; RV64-NEXT:    vand.vv v24, v16, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vadd.vv v8, v24, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %v = call <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64> %va, i1 true, <16 x i1> %m, i32 %evl)
   ret <16 x i64> %v
@@ -4485,24 +4893,16 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z
 define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_zero_undef_v16i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vsrl.vi v16, v8, 1
 ; RV32-NEXT:    vor.vv v8, v8, v16
@@ -4518,40 +4918,57 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex
 ; RV32-NEXT:    vsrl.vx v16, v8, a1
 ; RV32-NEXT:    vor.vv v8, v8, v16
 ; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
 ; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    vlse64.v v8, (a1), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v16
+; RV32-NEXT:    vsll.vi v0, v8, 2
+; RV32-NEXT:    vxor.vv v0, v8, v0
+; RV32-NEXT:    vadd.vv v24, v0, v0
+; RV32-NEXT:    vxor.vv v24, v0, v24
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v24, v16, v24
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v16, v24
+; RV32-NEXT:    vand.vv v24, v16, v0
+; RV32-NEXT:    vsrl.vi v16, v16, 2
+; RV32-NEXT:    vand.vv v16, v16, v0
+; RV32-NEXT:    vadd.vv v16, v24, v16
+; RV32-NEXT:    vsrl.vi v24, v16, 4
+; RV32-NEXT:    vadd.vv v16, v16, v24
+; RV32-NEXT:    vand.vv v16, v16, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 3
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vmul.vv v8, v16, v8
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_zero_undef_v16i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
 ; RV64-NEXT:    vor.vv v8, v8, v16
@@ -4563,39 +4980,54 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex
 ; RV64-NEXT:    vor.vv v8, v8, v16
 ; RV64-NEXT:    vsrl.vi v16, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    li a0, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a0
+; RV64-NEXT:    li a1, 32
+; RV64-NEXT:    vsrl.vx v16, v8, a1
 ; RV64-NEXT:    vor.vv v8, v8, v16
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
-; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v16, v8
-; RV64-NEXT:    vsrl.vi v16, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vnot.v v16, v8
+; RV64-NEXT:    vsrl.vi v8, v16, 1
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v0, v8, 2
+; RV64-NEXT:    vxor.vx v0, v0, a1
+; RV64-NEXT:    vadd.vv v24, v0, v0
+; RV64-NEXT:    vxor.vv v24, v0, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v24, v8, v24
+; RV64-NEXT:    vsub.vv v16, v16, v24
+; RV64-NEXT:    vand.vv v24, v16, v0
+; RV64-NEXT:    vsrl.vi v16, v16, 2
+; RV64-NEXT:    vand.vv v16, v16, v0
+; RV64-NEXT:    vadd.vv v16, v24, v16
+; RV64-NEXT:    vsrl.vi v24, v16, 4
+; RV64-NEXT:    vadd.vv v16, v16, v24
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v8, v8, 3
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vmul.vv v8, v16, v8
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
@@ -4606,37 +5038,26 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex
 define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_zero_undef_v32i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -48
-; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    li a2, 48
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 48
+; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; RV32-NEXT:    vslidedown.vi v24, v0, 2
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 44(sp)
-; RV32-NEXT:    sw a1, 40(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 36(sp)
-; RV32-NEXT:    sw a1, 32(sp)
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    li a3, 16
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    mv a2, a0
 ; RV32-NEXT:    bltu a0, a3, .LBB70_2
 ; RV32-NEXT:  # %bb.1:
@@ -4658,113 +5079,113 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 5
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 4
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a3, sp, 40
+; RV32-NEXT:    addi a3, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vlse64.v v16, (a3), zero
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    li a4, 40
 ; RV32-NEXT:    mul a3, a3, a4
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a3, sp, 32
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vxor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
-; RV32-NEXT:    addi a3, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
-; RV32-NEXT:    addi a3, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 3
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a2, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    slli a3, a3, 4
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a3, a0, -16
 ; RV32-NEXT:    sltu a0, a0, a3
@@ -4773,9 +5194,10 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a0, a0, a3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
 ; RV32-NEXT:    vor.vv v8, v16, v8, v0.t
@@ -4790,81 +5212,119 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV32-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    addi a0, sp, 48
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vxor.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 24
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
-; RV32-NEXT:    addi a0, sp, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    vsub.vv v16, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 40
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    li a1, 48
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 48
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_zero_undef_v32i64:
@@ -4872,23 +5332,25 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    li a2, 48
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    sub sp, sp, a1
-; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    li a1, 16
 ; RV64-NEXT:    vslidedown.vi v24, v0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    bltu a0, a2, .LBB70_2
+; RV64-NEXT:    mv a3, a0
+; RV64-NEXT:    bltu a0, a1, .LBB70_2
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    li a1, 16
+; RV64-NEXT:    li a3, 16
 ; RV64-NEXT:  .LBB70_2:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
@@ -4899,81 +5361,238 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
+; RV64-NEXT:    li a2, 32
+; RV64-NEXT:    vsrl.vx v16, v8, a2, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a2, 349525
-; RV64-NEXT:    addiw a2, a2, 1365
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a3, 209715
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a4, a1, 32
+; RV64-NEXT:    add a1, a1, a4
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    li a5, 40
+; RV64-NEXT:    mul a4, a4, a5
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a3, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vxor.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a4, 61681
-; RV64-NEXT:    addiw a4, a4, -241
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    lui a5, 4112
-; RV64-NEXT:    addiw a5, a5, 257
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a6, v0.t
-; RV64-NEXT:    addi a7, sp, 16
-; RV64-NEXT:    vs8r.v v8, (a7) # Unknown-size Folded Spill
-; RV64-NEXT:    addi a7, a0, -16
-; RV64-NEXT:    sltu a0, a0, a7
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 40
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a3, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a4, a0, -16
+; RV64-NEXT:    sltu a0, a0, a4
 ; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a7
+; RV64-NEXT:    and a0, a0, a4
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    csrr a0, vlenb
-; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a0, a0, a4
 ; RV64-NEXT:    add a0, sp, a0
 ; RV64-NEXT:    addi a0, a0, 16
-; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vor.vv v16, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v8, v16, 2, v0.t
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v8, v16, 1, v0.t
 ; RV64-NEXT:    vor.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 8, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 16, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vx v16, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vx v16, v8, a2, v0.t
 ; RV64-NEXT:    vor.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsll.vi v16, v16, 2, v0.t
+; RV64-NEXT:    vxor.vx v16, v16, a1, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsll.vi v8, v16, 1, v0.t
+; RV64-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
-; RV64-NEXT:    vsrl.vx v16, v8, a6, v0.t
-; RV64-NEXT:    addi a0, sp, 16
-; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vx v16, v8, a3, v0.t
 ; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 48
+; RV64-NEXT:    mul a0, a0, a1
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
@@ -4984,160 +5603,173 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctlz_zero_undef_v32i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -48
-; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb
-; RV32-NEXT:    vmv8r.v v24, v16
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 44(sp)
-; RV32-NEXT:    sw a1, 40(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 36(sp)
-; RV32-NEXT:    sw a1, 32(sp)
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    bltu a0, a2, .LBB71_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a1, 16
 ; RV32-NEXT:  .LBB71_2:
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v24, v8, 1
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 2
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 8
+; RV32-NEXT:    vor.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 16
+; RV32-NEXT:    vor.vv v8, v8, v24
 ; RV32-NEXT:    li a2, 32
-; RV32-NEXT:    vsrl.vx v16, v8, a2
-; RV32-NEXT:    vor.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vx v24, v8, a2
+; RV32-NEXT:    vor.vv v8, v8, v24
 ; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    addi a3, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a3, sp, 40
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a3), zero
 ; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
+; RV32-NEXT:    slli a3, a3, 3
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v0, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 32
+; RV32-NEXT:    addi a3, a3, 16
+; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    addi a3, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a3), zero
+; RV32-NEXT:    vlse64.v v24, (a3), zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v0, v24, 2
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    vadd.vv v16, v0, v0
+; RV32-NEXT:    vxor.vv v16, v0, v16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v16
 ; RV32-NEXT:    vand.vv v16, v8, v0
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
 ; RV32-NEXT:    vand.vv v8, v8, v0
 ; RV32-NEXT:    vadd.vv v8, v16, v8
 ; RV32-NEXT:    vsrl.vi v16, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    addi a3, sp, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a1, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a1
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    slli a3, a3, 3
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a3, a0, -16
 ; RV32-NEXT:    sltu a0, a0, a3
 ; RV32-NEXT:    addi a0, a0, -1
 ; RV32-NEXT:    and a0, a0, a3
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v8, v24, 1
-; RV32-NEXT:    vor.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 2
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v24, v8, 4
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v24, v8, 8
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vi v24, v8, 16
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vsrl.vx v24, v8, a2
-; RV32-NEXT:    vor.vv v8, v8, v24
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 1
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a2, 24
-; RV32-NEXT:    mul a0, a0, a2
+; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v24, v24, v16
-; RV32-NEXT:    vsub.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v24, v8, v0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v24
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vor.vv v16, v8, v16
+; RV32-NEXT:    vsrl.vi v0, v16, 2
+; RV32-NEXT:    vor.vv v16, v16, v0
+; RV32-NEXT:    vsrl.vi v0, v16, 4
+; RV32-NEXT:    vor.vv v16, v16, v0
+; RV32-NEXT:    vsrl.vi v0, v16, 8
+; RV32-NEXT:    vor.vv v16, v16, v0
+; RV32-NEXT:    vsrl.vi v0, v16, 16
+; RV32-NEXT:    vor.vv v16, v16, v0
+; RV32-NEXT:    vsrl.vx v0, v16, a2
+; RV32-NEXT:    vor.vv v16, v16, v0
+; RV32-NEXT:    vnot.v v8, v16
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v0, v24, 2
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    vadd.vv v16, v0, v0
+; RV32-NEXT:    vxor.vv v16, v0, v16
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    addi a0, sp, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vadd.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    vsrl.vx v16, v8, a1
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 48
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctlz_zero_undef_v32i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a2, 16
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    bltu a0, a2, .LBB71_2
-; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; RV64-NEXT:    li a1, 16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    mv a2, a0
+; RV64-NEXT:    bltu a0, a1, .LBB71_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a2, 16
 ; RV64-NEXT:  .LBB71_2:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
 ; RV64-NEXT:    vsrl.vi v24, v8, 1
 ; RV64-NEXT:    vor.vv v8, v8, v24
 ; RV64-NEXT:    vsrl.vi v24, v8, 2
@@ -5148,69 +5780,117 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex
 ; RV64-NEXT:    vor.vv v8, v8, v24
 ; RV64-NEXT:    vsrl.vi v24, v8, 16
 ; RV64-NEXT:    vor.vv v8, v8, v24
-; RV64-NEXT:    li a1, 32
-; RV64-NEXT:    vsrl.vx v24, v8, a1
+; RV64-NEXT:    li a3, 32
+; RV64-NEXT:    vsrl.vx v24, v8, a3
 ; RV64-NEXT:    vor.vv v8, v8, v24
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 1
-; RV64-NEXT:    lui a2, 349525
-; RV64-NEXT:    addiw a2, a2, 1365
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    vand.vx v24, v24, a2
-; RV64-NEXT:    vsub.vv v8, v8, v24
-; RV64-NEXT:    lui a3, 209715
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v24, v8, a3
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a4, a1, 32
+; RV64-NEXT:    add a1, a1, a4
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v0, v16, 2
+; RV64-NEXT:    vmv8r.v v24, v16
+; RV64-NEXT:    vxor.vx v0, v0, a1
+; RV64-NEXT:    vadd.vv v16, v0, v0
+; RV64-NEXT:    vxor.vv v16, v0, v16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v8, v8, v16
+; RV64-NEXT:    vand.vv v16, v8, v0
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vadd.vv v8, v24, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v24
-; RV64-NEXT:    lui a4, 61681
-; RV64-NEXT:    addiw a4, a4, -241
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    lui a5, 4112
-; RV64-NEXT:    addiw a5, a5, 257
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vmul.vx v8, v8, a5
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a6
-; RV64-NEXT:    addi a7, a0, -16
-; RV64-NEXT:    sltu a0, a0, a7
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v16, v24, 3
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    li a2, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a2
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a4, a0, -16
+; RV64-NEXT:    sltu a0, a0, a4
 ; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a7
+; RV64-NEXT:    and a0, a0, a4
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 2
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 4
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 8
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 16
-; RV64-NEXT:    vor.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vx v24, v16, a1
-; RV64-NEXT:    vor.vv v16, v16, v24
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vor.vv v16, v8, v16
+; RV64-NEXT:    vsrl.vi v0, v16, 2
+; RV64-NEXT:    vor.vv v16, v16, v0
+; RV64-NEXT:    vsrl.vi v0, v16, 4
+; RV64-NEXT:    vor.vv v16, v16, v0
+; RV64-NEXT:    vsrl.vi v0, v16, 8
+; RV64-NEXT:    vor.vv v16, v16, v0
+; RV64-NEXT:    vsrl.vi v0, v16, 16
+; RV64-NEXT:    vor.vv v16, v16, v0
+; RV64-NEXT:    vsrl.vx v0, v16, a3
+; RV64-NEXT:    vor.vv v16, v16, v0
 ; RV64-NEXT:    vnot.v v16, v16
+; RV64-NEXT:    vsll.vi v0, v24, 2
+; RV64-NEXT:    vxor.vx v0, v0, a1
+; RV64-NEXT:    vadd.vv v24, v0, v0
+; RV64-NEXT:    vxor.vv v8, v0, v24
 ; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    vand.vx v24, v24, a2
-; RV64-NEXT:    vsub.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v24, v16, a3
-; RV64-NEXT:    vsrl.vi v16, v16, 2
-; RV64-NEXT:    vand.vx v16, v16, a3
-; RV64-NEXT:    vadd.vv v16, v24, v16
-; RV64-NEXT:    vsrl.vi v24, v16, 4
-; RV64-NEXT:    vadd.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v16, v16, a4
-; RV64-NEXT:    vmul.vx v16, v16, a5
-; RV64-NEXT:    vsrl.vx v16, v16, a6
+; RV64-NEXT:    vand.vv v8, v24, v8
+; RV64-NEXT:    vsub.vv v8, v16, v8
+; RV64-NEXT:    vand.vv v16, v8, v0
+; RV64-NEXT:    vsrl.vi v8, v8, 2
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vx v16, v8, a2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <32 x i1> poison, i1 true, i32 0
   %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index 277146cc1403e9..5e6844bef8d62e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -262,35 +262,26 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind {
 ; RV32I-NEXT:    vor.vv v8, v8, v9
 ; RV32I-NEXT:    vnot.v v8, v8
 ; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a1, 349525
-; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
 ; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32I-NEXT:    vmv.v.x v10, a1
 ; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v9, v9, v10
+; RV32I-NEXT:    vsll.vi v11, v10, 2
+; RV32I-NEXT:    vxor.vv v11, v10, v11
+; RV32I-NEXT:    vadd.vv v12, v11, v11
+; RV32I-NEXT:    vxor.vv v12, v11, v12
+; RV32I-NEXT:    vand.vv v9, v9, v12
 ; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi a1, a1, 819
-; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a1
-; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v10, v8, v9
+; RV32I-NEXT:    vand.vv v9, v8, v11
 ; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vadd.vv v8, v10, v8
+; RV32I-NEXT:    vand.vv v8, v8, v11
+; RV32I-NEXT:    vadd.vv v8, v9, v8
 ; RV32I-NEXT:    vsrl.vi v9, v8, 4
 ; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a1, 61681
-; RV32I-NEXT:    addi a1, a1, -241
-; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a1
-; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    lui a1, 4112
-; RV32I-NEXT:    addi a1, a1, 257
-; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a1
-; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vsrl.vi v9, v10, 3
+; RV32I-NEXT:    vand.vv v9, v10, v9
 ; RV32I-NEXT:    vmul.vv v8, v8, v9
 ; RV32I-NEXT:    li a1, 56
 ; RV32I-NEXT:    vsrl.vx v8, v8, a1
@@ -671,35 +662,26 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind {
 ; RV32I-NEXT:    vor.vv v8, v8, v10
 ; RV32I-NEXT:    vnot.v v8, v8
 ; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a1, 349525
-; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
 ; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32I-NEXT:    vmv.v.x v12, a1
 ; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v10, v10, v12
+; RV32I-NEXT:    vsll.vi v14, v12, 2
+; RV32I-NEXT:    vxor.vv v14, v12, v14
+; RV32I-NEXT:    vadd.vv v16, v14, v14
+; RV32I-NEXT:    vxor.vv v16, v14, v16
+; RV32I-NEXT:    vand.vv v10, v10, v16
 ; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi a1, a1, 819
-; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a1
-; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v12, v8, v10
+; RV32I-NEXT:    vand.vv v10, v8, v14
 ; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vadd.vv v8, v12, v8
+; RV32I-NEXT:    vand.vv v8, v8, v14
+; RV32I-NEXT:    vadd.vv v8, v10, v8
 ; RV32I-NEXT:    vsrl.vi v10, v8, 4
 ; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    lui a1, 61681
-; RV32I-NEXT:    addi a1, a1, -241
-; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a1
-; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    lui a1, 4112
-; RV32I-NEXT:    addi a1, a1, 257
-; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a1
-; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v8, v8, v12
+; RV32I-NEXT:    vsrl.vi v10, v12, 3
+; RV32I-NEXT:    vand.vv v10, v12, v10
 ; RV32I-NEXT:    vmul.vv v8, v8, v10
 ; RV32I-NEXT:    li a1, 56
 ; RV32I-NEXT:    vsrl.vx v8, v8, a1
@@ -1061,35 +1043,26 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
 ; RV32I-NEXT:    vor.vv v8, v8, v9
 ; RV32I-NEXT:    vnot.v v8, v8
 ; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a1, 349525
-; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
 ; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32I-NEXT:    vmv.v.x v10, a1
 ; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v9, v9, v10
+; RV32I-NEXT:    vsll.vi v11, v10, 2
+; RV32I-NEXT:    vxor.vv v11, v10, v11
+; RV32I-NEXT:    vadd.vv v12, v11, v11
+; RV32I-NEXT:    vxor.vv v12, v11, v12
+; RV32I-NEXT:    vand.vv v9, v9, v12
 ; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi a1, a1, 819
-; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a1
-; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v10, v8, v9
+; RV32I-NEXT:    vand.vv v9, v8, v11
 ; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vadd.vv v8, v10, v8
+; RV32I-NEXT:    vand.vv v8, v8, v11
+; RV32I-NEXT:    vadd.vv v8, v9, v8
 ; RV32I-NEXT:    vsrl.vi v9, v8, 4
 ; RV32I-NEXT:    vadd.vv v8, v8, v9
-; RV32I-NEXT:    lui a1, 61681
-; RV32I-NEXT:    addi a1, a1, -241
-; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a1
-; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    lui a1, 4112
-; RV32I-NEXT:    addi a1, a1, 257
-; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a1
-; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vsrl.vi v9, v10, 3
+; RV32I-NEXT:    vand.vv v9, v10, v9
 ; RV32I-NEXT:    vmul.vv v8, v8, v9
 ; RV32I-NEXT:    li a1, 56
 ; RV32I-NEXT:    vsrl.vx v8, v8, a1
@@ -1446,35 +1419,26 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
 ; RV32I-NEXT:    vor.vv v8, v8, v10
 ; RV32I-NEXT:    vnot.v v8, v8
 ; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a1, 349525
-; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
 ; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32I-NEXT:    vmv.v.x v12, a1
 ; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v10, v10, v12
+; RV32I-NEXT:    vsll.vi v14, v12, 2
+; RV32I-NEXT:    vxor.vv v14, v12, v14
+; RV32I-NEXT:    vadd.vv v16, v14, v14
+; RV32I-NEXT:    vxor.vv v16, v14, v16
+; RV32I-NEXT:    vand.vv v10, v10, v16
 ; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi a1, a1, 819
-; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a1
-; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v12, v8, v10
+; RV32I-NEXT:    vand.vv v10, v8, v14
 ; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vadd.vv v8, v12, v8
+; RV32I-NEXT:    vand.vv v8, v8, v14
+; RV32I-NEXT:    vadd.vv v8, v10, v8
 ; RV32I-NEXT:    vsrl.vi v10, v8, 4
 ; RV32I-NEXT:    vadd.vv v8, v8, v10
-; RV32I-NEXT:    lui a1, 61681
-; RV32I-NEXT:    addi a1, a1, -241
-; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a1
-; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    lui a1, 4112
-; RV32I-NEXT:    addi a1, a1, 257
-; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a1
-; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vand.vv v8, v8, v12
+; RV32I-NEXT:    vsrl.vi v10, v12, 3
+; RV32I-NEXT:    vand.vv v10, v12, v10
 ; RV32I-NEXT:    vmul.vv v8, v8, v10
 ; RV32I-NEXT:    li a1, 56
 ; RV32I-NEXT:    vsrl.vx v8, v8, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
index c4b22955f84c4f..e1a0ebdf49a434 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll
@@ -681,37 +681,27 @@ declare <2 x i64> @llvm.vp.ctpop.v2i64(<2 x i64>, <2 x i1>, i32)
 define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsll.vi v10, v9, 2, v0.t
+; RV32-NEXT:    vxor.vv v10, v9, v10, v0.t
+; RV32-NEXT:    vsll.vi v11, v10, 1, v0.t
+; RV32-NEXT:    vxor.vv v11, v10, v11, v0.t
+; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v11, v12, v11, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v11, v0.t
+; RV32-NEXT:    vand.vv v11, v8, v10, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV32-NEXT:    vadd.vv v8, v11, v8, v0.t
+; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v9, 3, v0.t
+; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -719,34 +709,30 @@ define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ;
 ; RV64-LABEL: vp_ctpop_v2i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
+; RV64-NEXT:    vsll.vi v10, v9, 2, v0.t
+; RV64-NEXT:    vxor.vx v10, v10, a1, v0.t
+; RV64-NEXT:    vsll.vi v11, v10, 1, v0.t
+; RV64-NEXT:    vxor.vv v11, v10, v11, v0.t
+; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v11, v12, v11, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v11, v0.t
+; RV64-NEXT:    vand.vv v11, v8, v10, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vadd.vv v8, v11, v8, v0.t
+; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v9, v9, 3, v0.t
+; RV64-NEXT:    vand.vx v9, v9, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -757,37 +743,27 @@ define <2 x i64> @vp_ctpop_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v2i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsll.vi v10, v9, 2
+; RV32-NEXT:    vxor.vv v10, v9, v10
+; RV32-NEXT:    vadd.vv v11, v10, v10
+; RV32-NEXT:    vxor.vv v11, v10, v11
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    vand.vv v11, v12, v11
+; RV32-NEXT:    vsub.vv v8, v8, v11
+; RV32-NEXT:    vand.vv v11, v8, v10
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vadd.vv v8, v11, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v9, 3
+; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -795,34 +771,30 @@ define <2 x i64> @vp_ctpop_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ;
 ; RV64-LABEL: vp_ctpop_v2i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0
+; RV64-NEXT:    vsll.vi v10, v9, 2
+; RV64-NEXT:    vxor.vx v10, v10, a1
+; RV64-NEXT:    vadd.vv v11, v10, v10
+; RV64-NEXT:    vxor.vv v11, v10, v11
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vv v11, v12, v11
+; RV64-NEXT:    vsub.vv v8, v8, v11
+; RV64-NEXT:    vand.vv v11, v8, v10
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v10
+; RV64-NEXT:    vadd.vv v8, v11, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v10
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v9, v9, 3
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -837,37 +809,27 @@ declare <4 x i64> @llvm.vp.ctpop.v4i64(<4 x i64>, <4 x i1>, i32)
 define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsll.vi v12, v10, 2, v0.t
+; RV32-NEXT:    vxor.vv v12, v10, v12, v0.t
+; RV32-NEXT:    vsll.vi v14, v12, 1, v0.t
+; RV32-NEXT:    vxor.vv v14, v12, v14, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v14, v16, v14, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v14, v0.t
+; RV32-NEXT:    vand.vv v14, v8, v12, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vadd.vv v8, v14, v8, v0.t
+; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v10, 3, v0.t
+; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -875,34 +837,30 @@ define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ;
 ; RV64-LABEL: vp_ctpop_v4i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
+; RV64-NEXT:    vsll.vi v12, v10, 2, v0.t
+; RV64-NEXT:    vxor.vx v12, v12, a1, v0.t
+; RV64-NEXT:    vsll.vi v14, v12, 1, v0.t
+; RV64-NEXT:    vxor.vv v14, v12, v14, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v14, v16, v14, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v14, v0.t
+; RV64-NEXT:    vand.vv v14, v8, v12, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vadd.vv v8, v14, v8, v0.t
+; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v10, v10, 3, v0.t
+; RV64-NEXT:    vand.vx v10, v10, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -913,37 +871,27 @@ define <4 x i64> @vp_ctpop_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v4i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsll.vi v12, v10, 2
+; RV32-NEXT:    vxor.vv v12, v10, v12
+; RV32-NEXT:    vadd.vv v14, v12, v12
+; RV32-NEXT:    vxor.vv v14, v12, v14
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vand.vv v14, v16, v14
+; RV32-NEXT:    vsub.vv v8, v8, v14
+; RV32-NEXT:    vand.vv v14, v8, v12
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vadd.vv v8, v14, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v10, 3
+; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -951,34 +899,30 @@ define <4 x i64> @vp_ctpop_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ;
 ; RV64-LABEL: vp_ctpop_v4i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0
+; RV64-NEXT:    vsll.vi v12, v10, 2
+; RV64-NEXT:    vxor.vx v12, v12, a1
+; RV64-NEXT:    vadd.vv v14, v12, v12
+; RV64-NEXT:    vxor.vv v14, v12, v14
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vand.vv v14, v16, v14
+; RV64-NEXT:    vsub.vv v8, v8, v14
+; RV64-NEXT:    vand.vv v14, v8, v12
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v12
+; RV64-NEXT:    vadd.vv v8, v14, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v12
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v10, v10, 3
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vmul.vv v8, v8, v10
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -993,37 +937,27 @@ declare <8 x i64> @llvm.vp.ctpop.v8i64(<8 x i64>, <8 x i1>, i32)
 define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsll.vi v16, v12, 2, v0.t
+; RV32-NEXT:    vxor.vv v16, v12, v16, v0.t
+; RV32-NEXT:    vsll.vi v20, v16, 1, v0.t
+; RV32-NEXT:    vxor.vv v20, v16, v20, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v20, v24, v20, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v20, v0.t
+; RV32-NEXT:    vand.vv v20, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vadd.vv v8, v20, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v12, 3, v0.t
+; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -1031,34 +965,30 @@ define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ;
 ; RV64-LABEL: vp_ctpop_v8i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
+; RV64-NEXT:    vsll.vi v16, v12, 2, v0.t
+; RV64-NEXT:    vxor.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vsll.vi v20, v16, 1, v0.t
+; RV64-NEXT:    vxor.vv v20, v16, v20, v0.t
+; RV64-NEXT:    vsrl.vi v24, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v20, v24, v20, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v20, v0.t
+; RV64-NEXT:    vand.vv v20, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vadd.vv v8, v20, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v12, v12, 3, v0.t
+; RV64-NEXT:    vand.vx v12, v12, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -1069,37 +999,27 @@ define <8 x i64> @vp_ctpop_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v8i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsll.vi v16, v12, 2
+; RV32-NEXT:    vxor.vv v16, v12, v16
+; RV32-NEXT:    vadd.vv v20, v16, v16
+; RV32-NEXT:    vxor.vv v20, v16, v20
+; RV32-NEXT:    vsrl.vi v24, v8, 1
+; RV32-NEXT:    vand.vv v20, v24, v20
+; RV32-NEXT:    vsub.vv v8, v8, v20
+; RV32-NEXT:    vand.vv v20, v8, v16
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vadd.vv v8, v20, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
 ; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v12, 3
+; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v12
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -1107,34 +1027,30 @@ define <8 x i64> @vp_ctpop_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ;
 ; RV64-LABEL: vp_ctpop_v8i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0
+; RV64-NEXT:    vsll.vi v16, v12, 2
+; RV64-NEXT:    vxor.vx v16, v16, a1
+; RV64-NEXT:    vadd.vv v20, v16, v16
+; RV64-NEXT:    vxor.vv v20, v16, v20
+; RV64-NEXT:    vsrl.vi v24, v8, 1
+; RV64-NEXT:    vand.vv v20, v24, v20
+; RV64-NEXT:    vsub.vv v8, v8, v20
+; RV64-NEXT:    vand.vv v20, v8, v16
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vadd.vv v8, v20, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v12, v12, 3
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vmul.vv v8, v8, v12
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -1149,89 +1065,149 @@ declare <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64>, <15 x i1>, i32)
 define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v15i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vsll.vi v24, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v24, v16, v24, v0.t
+; RV32-NEXT:    vsll.vi v16, v24, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v24, v16, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_v15i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vsll.vi v24, v16, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v24, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 1, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v24, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v24, v24, v16, v0.t
+; RV64-NEXT:    vand.vv v16, v24, v8, v0.t
+; RV64-NEXT:    vsrl.vi v24, v24, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v24, v8, v0.t
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %v = call <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64> %va, <15 x i1> %m, i32 %evl)
   ret <15 x i64> %v
@@ -1240,89 +1216,101 @@ define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev
 define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v15i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
+; RV32-NEXT:    vsll.vi v24, v16, 2
+; RV32-NEXT:    vxor.vv v24, v16, v24
+; RV32-NEXT:    vadd.vv v0, v24, v24
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v16
+; RV32-NEXT:    vsub.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v0, v8, v24
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v24
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3
+; RV32-NEXT:    vand.vv v16, v16, v24
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_v15i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v24, v16, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v0, v24, v0
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
+; RV64-NEXT:    vand.vv v16, v16, v0
 ; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vand.vv v16, v8, v24
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
 ; RV64-NEXT:    vadd.vv v8, v16, v8
 ; RV64-NEXT:    vsrl.vi v16, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <15 x i1> poison, i1 true, i32 0
   %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
@@ -1335,89 +1323,149 @@ declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32)
 define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v16i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vsll.vi v24, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v24, v16, v24, v0.t
+; RV32-NEXT:    vsll.vi v16, v24, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v24, v16, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_v16i64:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vsll.vi v24, v16, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v24, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 1, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v24, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v24, v24, v16, v0.t
+; RV64-NEXT:    vand.vv v16, v24, v8, v0.t
+; RV64-NEXT:    vsrl.vi v24, v24, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v24, v8, v0.t
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %v = call <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64> %va, <16 x i1> %m, i32 %evl)
   ret <16 x i64> %v
@@ -1426,89 +1474,101 @@ define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev
 define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v16i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v24, v16, 2
+; RV32-NEXT:    vxor.vv v24, v16, v24
+; RV32-NEXT:    vadd.vv v0, v24, v24
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v16
+; RV32-NEXT:    vsub.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v0, v8, v24
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v24
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3
+; RV32-NEXT:    vand.vv v16, v16, v24
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_v16i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v24, v16, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v0, v24, v0
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
+; RV64-NEXT:    vand.vv v16, v16, v0
 ; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vand.vv v16, v8, v24
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
 ; RV64-NEXT:    vadd.vv v8, v16, v8
 ; RV64-NEXT:    vsrl.vi v16, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
@@ -1521,117 +1581,154 @@ declare <32 x i64> @llvm.vp.ctpop.v32i64(<32 x i64>, <32 x i1>, i32)
 define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v32i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -48
-; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    li a2, 48
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV32-NEXT:    csrr a1, vlenb
 ; RV32-NEXT:    li a2, 40
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 48
+; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; RV32-NEXT:    vslidedown.vi v7, v0, 2
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 44(sp)
-; RV32-NEXT:    sw a1, 40(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 36(sp)
-; RV32-NEXT:    sw a1, 32(sp)
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    bltu a0, a2, .LBB34_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a1, 16
 ; RV32-NEXT:  .LBB34_2:
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a2, sp, 40
+; RV32-NEXT:    addi a2, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vlse64.v v8, (a2), zero
-; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a2, a2, 5
-; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 48
-; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    li a3, 24
-; RV32-NEXT:    mul a2, a2, a3
-; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 48
-; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vv v24, v8, v16, v0.t
-; RV32-NEXT:    addi a2, sp, 32
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a2), zero
+; RV32-NEXT:    vlse64.v v16, (a2), zero
 ; RV32-NEXT:    csrr a2, vlenb
 ; RV32-NEXT:    li a3, 24
 ; RV32-NEXT:    mul a2, a2, a3
 ; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 48
-; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v24, v8, v0.t
-; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a2, a2, 4
-; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 48
+; RV32-NEXT:    addi a2, a2, 16
 ; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v16, v24, 2, v0.t
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a2, a2, 4
-; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 48
-; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
-; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v8, v16, 4, v0.t
-; RV32-NEXT:    vadd.vv v16, v16, v8, v0.t
-; RV32-NEXT:    addi a2, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a2), zero
-; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a2, a2, 4
-; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 48
-; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v8, v16, 1, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v16, 3, v0.t
 ; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
-; RV32-NEXT:    addi a2, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a2), zero
-; RV32-NEXT:    addi a2, sp, 48
-; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v24, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vmul.vv v8, v8, v24, v0.t
 ; RV32-NEXT:    li a1, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a1, v0.t
 ; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    slli a2, a2, 5
 ; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 48
+; RV32-NEXT:    addi a2, a2, 16
 ; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a2, a0, -16
 ; RV32-NEXT:    sltu a0, a0, a2
@@ -1639,52 +1736,102 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
 ; RV32-NEXT:    and a0, a0, a2
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vmv1r.v v0, v7
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v16, v16, v8, v0.t
+; RV32-NEXT:    vsll.vi v8, v16, 1, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a2, 40
 ; RV32-NEXT:    mul a0, a0, a2
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v24, v16, 1, v0.t
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v24, v8, 1, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsub.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vand.vv v24, v24, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    li a2, 40
 ; RV32-NEXT:    mul a0, a0, a2
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    mul a0, a0, a2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    mul a0, a0, a2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    mul a0, a0, a2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a0, a0, a2
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a0, sp, 48
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    mul a0, a0, a2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    mul a0, a0, a2
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vmul.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vx v16, v8, a1, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 48
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 48
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_v32i64:
@@ -1692,81 +1839,263 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
 ; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    li a2, 48
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    sub sp, sp, a1
-; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    slli a1, a1, 5
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
-; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    li a1, 16
 ; RV64-NEXT:    vslidedown.vi v24, v0, 2
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    bltu a0, a2, .LBB34_2
+; RV64-NEXT:    mv a2, a0
+; RV64-NEXT:    bltu a0, a1, .LBB34_2
 ; RV64-NEXT:  # %bb.1:
-; RV64-NEXT:    li a1, 16
+; RV64-NEXT:    li a2, 16
 ; RV64-NEXT:  .LBB34_2:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a1, 349525
-; RV64-NEXT:    addiw a1, a1, 1365
-; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    add a1, a1, a2
-; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a2, 209715
-; RV64-NEXT:    addiw a2, a2, 819
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    vand.vx v16, v8, a2, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a2, v0.t
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a3, a1, 32
+; RV64-NEXT:    add a1, a1, a3
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 40
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v8, v16, 1, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a3, 61681
-; RV64-NEXT:    addiw a3, a3, -241
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV64-NEXT:    lui a4, 4112
-; RV64-NEXT:    addiw a4, a4, 257
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
-; RV64-NEXT:    li a5, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a5, v0.t
-; RV64-NEXT:    addi a6, sp, 16
-; RV64-NEXT:    vs8r.v v8, (a6) # Unknown-size Folded Spill
-; RV64-NEXT:    addi a6, a0, -16
-; RV64-NEXT:    sltu a0, a0, a6
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 40
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV64-NEXT:    vand.vx v8, v16, a1, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vmul.vv v16, v16, v8, v0.t
+; RV64-NEXT:    li a2, 56
+; RV64-NEXT:    vsrl.vx v16, v16, a2, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a3, a0, -16
+; RV64-NEXT:    sltu a0, a0, a3
 ; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a6
+; RV64-NEXT:    and a0, a0, a3
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a3, 40
+; RV64-NEXT:    mul a0, a0, a3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsll.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v16, v16, a1, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsll.vi v8, v16, 1, v0.t
+; RV64-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 3
 ; RV64-NEXT:    add a0, sp, a0
 ; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
 ; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
-; RV64-NEXT:    vsub.vv v16, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v8, v16, a2, v0.t
-; RV64-NEXT:    vsrl.vi v16, v16, 2, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
-; RV64-NEXT:    vmul.vx v8, v8, a4, v0.t
-; RV64-NEXT:    vsrl.vx v16, v8, a5, v0.t
 ; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
 ; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a3, 40
+; RV64-NEXT:    mul a0, a0, a3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vx v16, v8, a2, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 48
+; RV64-NEXT:    mul a0, a0, a1
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
@@ -1777,190 +2106,226 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev
 define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_ctpop_v32i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -48
-; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 40
+; RV32-NEXT:    li a2, 24
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x28, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 40 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    slli a1, a1, 4
 ; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 48
+; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 44(sp)
-; RV32-NEXT:    sw a1, 40(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 36(sp)
-; RV32-NEXT:    sw a1, 32(sp)
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    bltu a0, a2, .LBB35_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a1, 16
 ; RV32-NEXT:  .LBB35_2:
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a2, sp, 40
+; RV32-NEXT:    addi a2, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v24, (a2), zero
-; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    li a3, 24
-; RV32-NEXT:    mul a2, a2, a3
-; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 48
-; RV32-NEXT:    vs8r.v v24, (a2) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a2, sp, 32
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a2), zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    vsll.vi v0, v24, 2
+; RV32-NEXT:    vxor.vv v16, v24, v0
+; RV32-NEXT:    vadd.vv v0, v16, v16
+; RV32-NEXT:    vxor.vv v8, v16, v0
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    addi a1, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v0, v8
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vl8r.v v0, (a1) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v0, v8
+; RV32-NEXT:    vand.vv v0, v8, v16
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vadd.vv v8, v16, v8
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vadd.vv v8, v0, v8
 ; RV32-NEXT:    vsrl.vi v16, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    addi a2, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a2), zero
-; RV32-NEXT:    csrr a2, vlenb
-; RV32-NEXT:    slli a2, a2, 4
-; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 48
-; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v16
-; RV32-NEXT:    addi a2, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a2), zero
-; RV32-NEXT:    addi a2, sp, 48
-; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vmul.vv v16, v16, v8
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
+; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a1, 56
-; RV32-NEXT:    vsrl.vx v8, v16, a1
+; RV32-NEXT:    vsrl.vx v8, v8, a1
 ; RV32-NEXT:    csrr a2, vlenb
 ; RV32-NEXT:    slli a2, a2, 3
 ; RV32-NEXT:    add a2, sp, a2
-; RV32-NEXT:    addi a2, a2, 48
+; RV32-NEXT:    addi a2, a2, 16
 ; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a2, a0, -16
 ; RV32-NEXT:    sltu a0, a0, a2
 ; RV32-NEXT:    addi a0, a0, -1
 ; RV32-NEXT:    and a0, a0, a2
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v16, v24, 2
+; RV32-NEXT:    vxor.vv v16, v24, v16
+; RV32-NEXT:    vadd.vv v0, v16, v16
+; RV32-NEXT:    vxor.vv v8, v16, v0
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a2, 24
-; RV32-NEXT:    mul a0, a0, a2
-; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v16, v8, v16
-; RV32-NEXT:    vand.vv v8, v16, v0
-; RV32-NEXT:    vsrl.vi v16, v16, 2
-; RV32-NEXT:    vand.vv v16, v16, v0
-; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v0, v8
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v0, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v0, v8
+; RV32-NEXT:    vand.vv v0, v8, v16
+; RV32-NEXT:    vsrl.vi v8, v8, 2
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    addi a0, sp, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    vsrl.vx v16, v8, a1
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 40
+; RV32-NEXT:    li a1, 24
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 48
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_ctpop_v32i64_unmasked:
 ; RV64:       # %bb.0:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV64-NEXT:    li a2, 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    mv a1, a0
 ; RV64-NEXT:    bltu a0, a2, .LBB35_2
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a1, 16
 ; RV64-NEXT:  .LBB35_2:
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v8, 1
-; RV64-NEXT:    lui a1, 349525
-; RV64-NEXT:    addiw a1, a1, 1365
-; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    add a1, a1, a2
-; RV64-NEXT:    vand.vx v24, v24, a1
-; RV64-NEXT:    vsub.vv v8, v8, v24
-; RV64-NEXT:    lui a2, 209715
-; RV64-NEXT:    addiw a2, a2, 819
+; RV64-NEXT:    lui a2, 61681
+; RV64-NEXT:    addiw a2, a2, -241
 ; RV64-NEXT:    slli a3, a2, 32
 ; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    vand.vx v24, v8, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a2
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v0, v8, 2
+; RV64-NEXT:    vxor.vx v0, v0, a2
+; RV64-NEXT:    vadd.vv v24, v0, v0
+; RV64-NEXT:    vxor.vv v8, v0, v24
+; RV64-NEXT:    addi a1, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v24, v16, 1
+; RV64-NEXT:    vand.vv v8, v24, v8
+; RV64-NEXT:    vsub.vv v8, v16, v8
+; RV64-NEXT:    vand.vv v24, v8, v0
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    vand.vv v8, v8, v0
 ; RV64-NEXT:    vadd.vv v8, v24, v8
 ; RV64-NEXT:    vsrl.vi v24, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v24
-; RV64-NEXT:    lui a3, 61681
-; RV64-NEXT:    addiw a3, a3, -241
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    lui a4, 4112
-; RV64-NEXT:    addiw a4, a4, 257
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vmul.vx v8, v8, a4
-; RV64-NEXT:    li a5, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a5
-; RV64-NEXT:    addi a6, a0, -16
-; RV64-NEXT:    sltu a0, a0, a6
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vl8r.v v16, (a1) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v24, v16, 3
+; RV64-NEXT:    vand.vx v24, v24, a2
+; RV64-NEXT:    vmul.vv v8, v8, v24
+; RV64-NEXT:    li a1, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a1
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a3, a0, -16
+; RV64-NEXT:    sltu a0, a0, a3
 ; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a6
+; RV64-NEXT:    and a0, a0, a3
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    vand.vx v24, v24, a1
-; RV64-NEXT:    vsub.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v24, v16, a2
-; RV64-NEXT:    vsrl.vi v16, v16, 2
+; RV64-NEXT:    vsll.vi v24, v16, 2
+; RV64-NEXT:    vxor.vx v24, v24, a2
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v8, v24, v0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v0, v16, 1
+; RV64-NEXT:    vand.vv v8, v0, v8
+; RV64-NEXT:    vsub.vv v8, v16, v8
+; RV64-NEXT:    vand.vv v0, v8, v24
+; RV64-NEXT:    vsrl.vi v8, v8, 2
+; RV64-NEXT:    vand.vv v8, v8, v24
+; RV64-NEXT:    vadd.vv v8, v0, v8
+; RV64-NEXT:    vsrl.vi v24, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v24
+; RV64-NEXT:    vand.vx v8, v8, a2
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
 ; RV64-NEXT:    vand.vx v16, v16, a2
-; RV64-NEXT:    vadd.vv v16, v24, v16
-; RV64-NEXT:    vsrl.vi v24, v16, 4
-; RV64-NEXT:    vadd.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v16, v16, a3
-; RV64-NEXT:    vmul.vx v16, v16, a4
-; RV64-NEXT:    vsrl.vx v16, v16, a5
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vx v16, v8, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <32 x i1> poison, i1 true, i32 0
   %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index b5114bbe491896..909d347dfa0691 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -129,36 +129,27 @@ define void @ctpop_v2i64(ptr %x, ptr %y) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    vand.vv v9, v10, v9
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vsll.vi v10, v9, 2
+; RV32-NEXT:    vxor.vv v10, v9, v10
+; RV32-NEXT:    vadd.vv v11, v10, v10
+; RV32-NEXT:    vxor.vv v11, v10, v11
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    vand.vv v11, v12, v11
+; RV32-NEXT:    vsub.vv v8, v8, v11
+; RV32-NEXT:    vand.vv v11, v8, v10
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vadd.vv v8, v11, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v9, 3
+; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a1, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a1
@@ -435,36 +426,27 @@ define void @ctpop_v4i64(ptr %x, ptr %y) {
 ; RV32:       # %bb.0:
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32-NEXT:    vle64.v v8, (a0)
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    vand.vv v10, v12, v10
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vsll.vi v12, v10, 2
+; RV32-NEXT:    vxor.vv v12, v10, v12
+; RV32-NEXT:    vadd.vv v14, v12, v12
+; RV32-NEXT:    vxor.vv v14, v12, v14
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vand.vv v14, v16, v14
+; RV32-NEXT:    vsub.vv v8, v8, v14
+; RV32-NEXT:    vand.vv v14, v8, v12
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vadd.vv v8, v14, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v10, 3
+; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a1, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
index 49f6ffd691292a..afa8c2047bcddd 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll
@@ -777,41 +777,31 @@ declare <2 x i64> @llvm.vp.cttz.v2i64(<2 x i64>, i1 immarg, <2 x i1>, i32)
 define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsll.vi v10, v9, 2, v0.t
+; RV32-NEXT:    vxor.vv v10, v9, v10, v0.t
+; RV32-NEXT:    vsll.vi v11, v10, 1, v0.t
+; RV32-NEXT:    vxor.vv v11, v10, v11, v0.t
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v12, v8, a0, v0.t
+; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v11, v12, v11, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v11, v0.t
+; RV32-NEXT:    vand.vv v11, v8, v10, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV32-NEXT:    vadd.vv v8, v11, v8, v0.t
+; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v9, 3, v0.t
+; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -819,38 +809,34 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ;
 ; RV64-LABEL: vp_cttz_v2i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v10, v9, 2, v0.t
+; RV64-NEXT:    vxor.vx v10, v10, a1, v0.t
+; RV64-NEXT:    vsll.vi v11, v10, 1, v0.t
+; RV64-NEXT:    vxor.vv v11, v10, v11, v0.t
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v12, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v11, v12, v11, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v11, v0.t
+; RV64-NEXT:    vand.vv v11, v8, v10, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vadd.vv v8, v11, v8, v0.t
+; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v9, v9, 3, v0.t
+; RV64-NEXT:    vand.vx v9, v9, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -861,41 +847,31 @@ define <2 x i64> @vp_cttz_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v2i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsll.vi v10, v9, 2
+; RV32-NEXT:    vxor.vv v10, v9, v10
+; RV32-NEXT:    vadd.vv v11, v10, v10
+; RV32-NEXT:    vxor.vv v11, v10, v11
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v12, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    vand.vv v11, v12, v11
+; RV32-NEXT:    vsub.vv v8, v8, v11
+; RV32-NEXT:    vand.vv v11, v8, v10
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vadd.vv v8, v11, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v9, 3
+; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -903,38 +879,34 @@ define <2 x i64> @vp_cttz_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ;
 ; RV64-LABEL: vp_cttz_v2i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
+; RV64-NEXT:    vsll.vi v10, v9, 2
+; RV64-NEXT:    vxor.vx v10, v10, a1
+; RV64-NEXT:    vadd.vv v11, v10, v10
+; RV64-NEXT:    vxor.vv v11, v10, v11
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v12, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v12
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vv v11, v12, v11
+; RV64-NEXT:    vsub.vv v8, v8, v11
+; RV64-NEXT:    vand.vv v11, v8, v10
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v10
+; RV64-NEXT:    vadd.vv v8, v11, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v10
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v9, v9, 3
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -949,41 +921,31 @@ declare <4 x i64> @llvm.vp.cttz.v4i64(<4 x i64>, i1 immarg, <4 x i1>, i32)
 define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsll.vi v12, v10, 2, v0.t
+; RV32-NEXT:    vxor.vv v12, v10, v12, v0.t
+; RV32-NEXT:    vsll.vi v14, v12, 1, v0.t
+; RV32-NEXT:    vxor.vv v14, v12, v14, v0.t
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v16, v8, a0, v0.t
+; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v14, v16, v14, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v14, v0.t
+; RV32-NEXT:    vand.vv v14, v8, v12, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vadd.vv v8, v14, v8, v0.t
+; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v10, 3, v0.t
+; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -991,38 +953,34 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ;
 ; RV64-LABEL: vp_cttz_v4i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v12, v10, 2, v0.t
+; RV64-NEXT:    vxor.vx v12, v12, a1, v0.t
+; RV64-NEXT:    vsll.vi v14, v12, 1, v0.t
+; RV64-NEXT:    vxor.vv v14, v12, v14, v0.t
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v14, v16, v14, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v14, v0.t
+; RV64-NEXT:    vand.vv v14, v8, v12, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vadd.vv v8, v14, v8, v0.t
+; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v10, v10, 3, v0.t
+; RV64-NEXT:    vand.vx v10, v10, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -1033,41 +991,31 @@ define <4 x i64> @vp_cttz_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v4i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsll.vi v12, v10, 2
+; RV32-NEXT:    vxor.vv v12, v10, v12
+; RV32-NEXT:    vadd.vv v14, v12, v12
+; RV32-NEXT:    vxor.vv v14, v12, v14
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v16, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vand.vv v14, v16, v14
+; RV32-NEXT:    vsub.vv v8, v8, v14
+; RV32-NEXT:    vand.vv v14, v8, v12
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vadd.vv v8, v14, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v10, 3
+; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -1075,38 +1023,34 @@ define <4 x i64> @vp_cttz_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ;
 ; RV64-LABEL: vp_cttz_v4i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1
+; RV64-NEXT:    vsll.vi v12, v10, 2
+; RV64-NEXT:    vxor.vx v12, v12, a1
+; RV64-NEXT:    vadd.vv v14, v12, v12
+; RV64-NEXT:    vxor.vv v14, v12, v14
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v16, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vand.vv v14, v16, v14
+; RV64-NEXT:    vsub.vv v8, v8, v14
+; RV64-NEXT:    vand.vv v14, v8, v12
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v12
+; RV64-NEXT:    vadd.vv v8, v14, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v12
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v10, v10, 3
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vmul.vv v8, v8, v10
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -1121,41 +1065,31 @@ declare <8 x i64> @llvm.vp.cttz.v8i64(<8 x i64>, i1 immarg, <8 x i1>, i32)
 define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsll.vi v16, v12, 2, v0.t
+; RV32-NEXT:    vxor.vv v16, v12, v16, v0.t
+; RV32-NEXT:    vsll.vi v20, v16, 1, v0.t
+; RV32-NEXT:    vxor.vv v20, v16, v20, v0.t
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v24, v8, a0, v0.t
+; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v20, v24, v20, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v20, v0.t
+; RV32-NEXT:    vand.vv v20, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vadd.vv v8, v20, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v12, 3, v0.t
+; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -1163,38 +1097,34 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ;
 ; RV64-LABEL: vp_cttz_v8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v12, 2, v0.t
+; RV64-NEXT:    vxor.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vsll.vi v20, v16, 1, v0.t
+; RV64-NEXT:    vxor.vv v20, v16, v20, v0.t
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v24, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV64-NEXT:    vsrl.vi v24, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v20, v24, v20, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v20, v0.t
+; RV64-NEXT:    vand.vv v20, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vadd.vv v8, v20, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v12, v12, 3, v0.t
+; RV64-NEXT:    vand.vx v12, v12, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -1205,41 +1135,31 @@ define <8 x i64> @vp_cttz_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v8i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsll.vi v16, v12, 2
+; RV32-NEXT:    vxor.vv v16, v12, v16
+; RV32-NEXT:    vadd.vv v20, v16, v16
+; RV32-NEXT:    vxor.vv v20, v16, v20
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v24, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 1
+; RV32-NEXT:    vand.vv v20, v24, v20
+; RV32-NEXT:    vsub.vv v8, v8, v20
+; RV32-NEXT:    vand.vv v20, v8, v16
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vadd.vv v8, v20, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
 ; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v12, 3
+; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v12
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -1247,38 +1167,34 @@ define <8 x i64> @vp_cttz_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ;
 ; RV64-LABEL: vp_cttz_v8i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1
+; RV64-NEXT:    vsll.vi v16, v12, 2
+; RV64-NEXT:    vxor.vx v16, v16, a1
+; RV64-NEXT:    vadd.vv v20, v16, v16
+; RV64-NEXT:    vxor.vv v20, v16, v20
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v24, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
+; RV64-NEXT:    vsrl.vi v24, v8, 1
+; RV64-NEXT:    vand.vv v20, v24, v20
+; RV64-NEXT:    vsub.vv v8, v8, v20
+; RV64-NEXT:    vand.vv v20, v8, v16
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vadd.vv v8, v20, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v12, v12, 3
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vmul.vv v8, v8, v12
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -1293,97 +1209,173 @@ declare <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32)
 define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v15i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    addi a1, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
+; RV32-NEXT:    vsll.vi v24, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v24, v16, v24, v0.t
+; RV32-NEXT:    vsll.vi v16, v24, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v24, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_v15i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
-; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV64-NEXT:    vsll.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v24, v16, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v24, 1, v0.t
+; RV64-NEXT:    vxor.vv v8, v24, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v16, v8, a0, v0.t
+; RV64-NEXT:    vnot.v v8, v8, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vand.vv v16, v8, v24, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v24, v0.t
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 false, <15 x i1> %m, i32 %evl)
   ret <15 x i64> %v
@@ -1392,97 +1384,109 @@ define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl
 define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v15i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v16, (a1), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v24, v16, 2
+; RV32-NEXT:    vxor.vv v24, v16, v24
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v0, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vadd.vv v0, v24, v24
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v8
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v0, v8, v24
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v24
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3
+; RV32-NEXT:    vand.vv v16, v16, v24
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_v15i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1
+; RV64-NEXT:    vsll.vi v24, v16, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v0, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v0, v24, v0
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
+; RV64-NEXT:    vand.vv v16, v16, v0
 ; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vand.vv v16, v8, v24
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
 ; RV64-NEXT:    vadd.vv v8, v16, v8
 ; RV64-NEXT:    vsrl.vi v16, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <15 x i1> poison, i1 true, i32 0
   %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
@@ -1495,97 +1499,173 @@ declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32)
 define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v16i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    addi a1, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
+; RV32-NEXT:    vsll.vi v24, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v24, v16, v24, v0.t
+; RV32-NEXT:    vsll.vi v16, v24, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v24, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_v16i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v24, v16, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v24, 1, v0.t
+; RV64-NEXT:    vxor.vv v8, v24, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v16, v8, v24, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v24, v0.t
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 false, <16 x i1> %m, i32 %evl)
   ret <16 x i64> %v
@@ -1594,97 +1674,109 @@ define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl
 define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v16i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v16, (a1), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v24, v16, 2
+; RV32-NEXT:    vxor.vv v24, v16, v24
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v0, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vadd.vv v0, v24, v24
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v8
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v0, v8, v24
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v24
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3
+; RV32-NEXT:    vand.vv v16, v16, v24
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_v16i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1
+; RV64-NEXT:    vsll.vi v24, v16, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v0, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v0, v24, v0
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
+; RV64-NEXT:    vand.vv v16, v16, v0
 ; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vand.vv v16, v8, v24
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
 ; RV64-NEXT:    vadd.vv v8, v16, v8
 ; RV64-NEXT:    vsrl.vi v16, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
@@ -1697,155 +1789,142 @@ declare <32 x i64> @llvm.vp.cttz.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32)
 define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v32i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -48
-; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    li a2, 48
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 48
+; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; RV32-NEXT:    vslidedown.vi v24, v0, 2
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 44(sp)
-; RV32-NEXT:    sw a1, 40(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 36(sp)
-; RV32-NEXT:    sw a1, 32(sp)
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    li a3, 16
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    mv a2, a0
-; RV32-NEXT:    bltu a0, a3, .LBB34_2
-; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    sw a1, 8(sp)
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB34_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
 ; RV32-NEXT:  .LBB34_2:
+; RV32-NEXT:    addi a2, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a2), zero
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vx v8, v16, a1, v0.t
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vnot.v v8, v16, v0.t
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a3, sp, 40
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a3, sp, 32
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
-; RV32-NEXT:    addi a3, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
-; RV32-NEXT:    addi a3, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 3
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a2, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    slli a3, a3, 4
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a3, a0, -16
 ; RV32-NEXT:    sltu a0, a0, a3
@@ -1854,88 +1933,116 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    mul a0, a0, a3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vx v8, v16, a1, v0.t
-; RV32-NEXT:    vnot.v v16, v16, v0.t
-; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
-; RV32-NEXT:    addi a0, sp, 48
-; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a0, a0, a3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vx v8, v16, a1, v0.t
+; RV32-NEXT:    addi a0, sp, 16
 ; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vnot.v v8, v16, v0.t
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 24
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
-; RV32-NEXT:    addi a0, sp, 48
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 40
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    li a1, 48
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 48
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_v32i64:
@@ -1943,14 +2050,21 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    li a2, 48
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    sub sp, sp, a1
-; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; RV64-NEXT:    li a1, 16
 ; RV64-NEXT:    vslidedown.vi v24, v0, 2
@@ -1959,72 +2073,231 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a2, 16
 ; RV64-NEXT:  .LBB34_2:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a3, a1, 32
+; RV64-NEXT:    add a1, a1, a3
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 40
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    li a2, 1
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v8, v16, a2, v0.t
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vnot.v v8, v16, v0.t
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
 ; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a2, 349525
-; RV64-NEXT:    addiw a2, a2, 1365
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a3, 209715
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a4, 61681
-; RV64-NEXT:    addiw a4, a4, -241
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    lui a5, 4112
-; RV64-NEXT:    addiw a5, a5, 257
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a6, v0.t
-; RV64-NEXT:    addi a7, sp, 16
-; RV64-NEXT:    vs8r.v v8, (a7) # Unknown-size Folded Spill
-; RV64-NEXT:    addi a7, a0, -16
-; RV64-NEXT:    sltu a0, a0, a7
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 40
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV64-NEXT:    vand.vx v8, v16, a1, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vmul.vv v16, v16, v8, v0.t
+; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    vsrl.vx v16, v16, a3, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a4, a0, -16
+; RV64-NEXT:    sltu a0, a0, a4
 ; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a7
+; RV64-NEXT:    and a0, a0, a4
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a4, 40
+; RV64-NEXT:    mul a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsll.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v16, v16, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsll.vi v8, v16, 1, v0.t
+; RV64-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 3
 ; RV64-NEXT:    add a0, sp, a0
 ; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
 ; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsub.vx v16, v8, a2, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vnot.v v16, v8, v0.t
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v16, 2, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
-; RV64-NEXT:    vsrl.vx v16, v8, a6, v0.t
-; RV64-NEXT:    addi a0, sp, 16
-; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vx v16, v8, a3, v0.t
 ; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 48
+; RV64-NEXT:    mul a0, a0, a1
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
@@ -2035,193 +2308,236 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl
 define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_v32i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -48
-; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb
-; RV32-NEXT:    vmv8r.v v24, v16
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 44(sp)
-; RV32-NEXT:    sw a1, 40(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 36(sp)
-; RV32-NEXT:    sw a1, 32(sp)
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    bltu a0, a2, .LBB35_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a1, 16
 ; RV32-NEXT:  .LBB35_2:
-; RV32-NEXT:    li a2, 1
+; RV32-NEXT:    addi a2, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v24, (a2), zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a2
+; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    vsub.vx v0, v8, a1
 ; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v0, v24, 2
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    vadd.vv v8, v0, v0
+; RV32-NEXT:    vxor.vv v8, v0, v8
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v16, 1
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a3, sp, 40
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v0, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 32
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a3), zero
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v16, v8
 ; RV32-NEXT:    vand.vv v16, v8, v0
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
 ; RV32-NEXT:    vand.vv v8, v8, v0
 ; RV32-NEXT:    vadd.vv v8, v16, v8
 ; RV32-NEXT:    vsrl.vi v16, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    addi a3, sp, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v16
-; RV32-NEXT:    li a1, 56
-; RV32-NEXT:    vsrl.vx v8, v8, a1
+; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a2
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    slli a3, a3, 3
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a3, a0, -16
 ; RV32-NEXT:    sltu a0, a0, a3
 ; RV32-NEXT:    addi a0, a0, -1
 ; RV32-NEXT:    and a0, a0, a3
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v8, v24, a2
-; RV32-NEXT:    vnot.v v24, v24
-; RV32-NEXT:    vand.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 1
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a2, 24
-; RV32-NEXT:    mul a0, a0, a2
+; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v24, v24, v16
-; RV32-NEXT:    vsub.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v24, v8, v0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v24
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vx v16, v8, a1
+; RV32-NEXT:    vnot.v v0, v8
+; RV32-NEXT:    vand.vv v8, v0, v16
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v0, v24, 2
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    vadd.vv v16, v0, v0
+; RV32-NEXT:    vxor.vv v16, v0, v16
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    addi a0, sp, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vadd.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vx v16, v8, a1
+; RV32-NEXT:    vsrl.vx v16, v8, a2
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 48
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_v32i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a2, 16
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    bltu a0, a2, .LBB35_2
-; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV64-NEXT:    li a1, 16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    mv a2, a0
+; RV64-NEXT:    bltu a0, a1, .LBB35_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a2, 16
 ; RV64-NEXT:  .LBB35_2:
-; RV64-NEXT:    li a2, 1
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v24, v8, a2
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v24
-; RV64-NEXT:    vsrl.vi v24, v8, 1
-; RV64-NEXT:    lui a1, 349525
-; RV64-NEXT:    addiw a1, a1, 1365
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
 ; RV64-NEXT:    slli a3, a1, 32
 ; RV64-NEXT:    add a1, a1, a3
-; RV64-NEXT:    vand.vx v24, v24, a1
-; RV64-NEXT:    vsub.vv v8, v8, v24
-; RV64-NEXT:    lui a3, 209715
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v24, v8, a3
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV64-NEXT:    li a2, 1
+; RV64-NEXT:    vsub.vx v0, v8, a2
+; RV64-NEXT:    vnot.v v8, v8
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vsll.vi v0, v16, 2
+; RV64-NEXT:    vxor.vx v0, v0, a1
+; RV64-NEXT:    vadd.vv v24, v0, v0
+; RV64-NEXT:    vxor.vv v24, v0, v24
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vand.vv v16, v16, v24
+; RV64-NEXT:    vsub.vv v8, v8, v16
+; RV64-NEXT:    vand.vv v16, v8, v0
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vadd.vv v8, v24, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v24
-; RV64-NEXT:    lui a4, 61681
-; RV64-NEXT:    addiw a4, a4, -241
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    lui a5, 4112
-; RV64-NEXT:    addiw a5, a5, 257
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vmul.vx v8, v8, a5
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a6
-; RV64-NEXT:    addi a7, a0, -16
-; RV64-NEXT:    sltu a0, a0, a7
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v0, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v0, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a3
+; RV64-NEXT:    addi a4, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a4, a0, -16
+; RV64-NEXT:    sltu a0, a0, a4
 ; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a7
+; RV64-NEXT:    and a0, a0, a4
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v24, v16, a2
-; RV64-NEXT:    vnot.v v16, v16
-; RV64-NEXT:    vand.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    vand.vx v24, v24, a1
-; RV64-NEXT:    vsub.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v24, v16, a3
-; RV64-NEXT:    vsrl.vi v16, v16, 2
-; RV64-NEXT:    vand.vx v16, v16, a3
-; RV64-NEXT:    vadd.vv v16, v24, v16
-; RV64-NEXT:    vsrl.vi v24, v16, 4
-; RV64-NEXT:    vadd.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v16, v16, a4
-; RV64-NEXT:    vmul.vx v16, v16, a5
-; RV64-NEXT:    vsrl.vx v16, v16, a6
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v16, v8, a2
+; RV64-NEXT:    vnot.v v24, v8
+; RV64-NEXT:    vand.vv v16, v24, v16
+; RV64-NEXT:    vsll.vi v24, v0, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v8, v24, v0
+; RV64-NEXT:    vsrl.vi v0, v16, 1
+; RV64-NEXT:    vand.vv v8, v0, v8
+; RV64-NEXT:    vsub.vv v8, v16, v8
+; RV64-NEXT:    vand.vv v16, v8, v24
+; RV64-NEXT:    vsrl.vi v8, v8, 2
+; RV64-NEXT:    vand.vv v8, v8, v24
+; RV64-NEXT:    vadd.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vx v16, v8, a3
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <32 x i1> poison, i1 true, i32 0
   %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
@@ -2976,41 +3292,31 @@ define <16 x i32> @vp_cttz_zero_undef_v16i32_unmasked(<16 x i32> %va, i32 zeroex
 define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v2i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV32-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsll.vi v10, v9, 2, v0.t
+; RV32-NEXT:    vxor.vv v10, v9, v10, v0.t
+; RV32-NEXT:    vsll.vi v11, v10, 1, v0.t
+; RV32-NEXT:    vxor.vv v11, v10, v11, v0.t
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v12, v8, a0, v0.t
+; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v11, v12, v11, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v11, v0.t
+; RV32-NEXT:    vand.vv v11, v8, v10, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV32-NEXT:    vadd.vv v8, v11, v8, v0.t
+; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v9, 3, v0.t
+; RV32-NEXT:    vand.vv v9, v9, v10, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -3018,38 +3324,34 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v2i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v10, v9, 2, v0.t
+; RV64-NEXT:    vxor.vx v10, v10, a1, v0.t
+; RV64-NEXT:    vsll.vi v11, v10, 1, v0.t
+; RV64-NEXT:    vxor.vv v11, v10, v11, v0.t
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v12, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v9, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v11, v12, v11, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v11, v0.t
+; RV64-NEXT:    vand.vv v11, v8, v10, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v9, v8, v0.t
-; RV64-NEXT:    vsrl.vi v9, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v9, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vadd.vv v8, v11, v8, v0.t
+; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v9, v9, 3, v0.t
+; RV64-NEXT:    vand.vx v9, v9, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v9, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -3060,41 +3362,31 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64(<2 x i64> %va, <2 x i1> %m, i32 zeroe
 define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v2i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vsub.vx v9, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vsrl.vi v9, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v9, v9, v10
-; RV32-NEXT:    vsub.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV32-NEXT:    vand.vv v10, v8, v9
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    vadd.vv v8, v10, v8
-; RV32-NEXT:    vsrl.vi v9, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v9
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32-NEXT:    vmv.v.x v9, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsll.vi v10, v9, 2
+; RV32-NEXT:    vxor.vv v10, v9, v10
+; RV32-NEXT:    vadd.vv v11, v10, v10
+; RV32-NEXT:    vxor.vv v11, v10, v11
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v12, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vsrl.vi v12, v8, 1
+; RV32-NEXT:    vand.vv v11, v12, v11
+; RV32-NEXT:    vsub.vv v8, v8, v11
+; RV32-NEXT:    vand.vv v11, v8, v10
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v10
+; RV32-NEXT:    vadd.vv v8, v11, v8
+; RV32-NEXT:    vsrl.vi v10, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    vand.vv v8, v8, v9
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32-NEXT:    vmv.v.x v9, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT:    vsrl.vi v10, v9, 3
+; RV32-NEXT:    vand.vv v9, v9, v10
 ; RV32-NEXT:    vmul.vv v8, v8, v9
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -3102,38 +3394,34 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v2i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV64-NEXT:    vmv.v.x v9, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m1, ta, ma
-; RV64-NEXT:    vsub.vx v9, v8, a1
+; RV64-NEXT:    vsll.vi v10, v9, 2
+; RV64-NEXT:    vxor.vx v10, v10, a1
+; RV64-NEXT:    vadd.vv v11, v10, v10
+; RV64-NEXT:    vxor.vv v11, v10, v11
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v12, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v9
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v9, a0
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v9, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v12
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vv v11, v12, v11
+; RV64-NEXT:    vsub.vv v8, v8, v11
+; RV64-NEXT:    vand.vv v11, v8, v10
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v10
+; RV64-NEXT:    vadd.vv v8, v11, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v10
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v9, v9, 3
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -3146,41 +3434,31 @@ define <2 x i64> @vp_cttz_zero_undef_v2i64_unmasked(<2 x i64> %va, i32 zeroext %
 define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v4i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV32-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsll.vi v12, v10, 2, v0.t
+; RV32-NEXT:    vxor.vv v12, v10, v12, v0.t
+; RV32-NEXT:    vsll.vi v14, v12, 1, v0.t
+; RV32-NEXT:    vxor.vv v14, v12, v14, v0.t
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v16, v8, a0, v0.t
+; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v14, v16, v14, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v14, v0.t
+; RV32-NEXT:    vand.vv v14, v8, v12, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV32-NEXT:    vadd.vv v8, v14, v8, v0.t
+; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v10, 3, v0.t
+; RV32-NEXT:    vand.vv v10, v10, v12, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -3188,38 +3466,34 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v4i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v12, v10, 2, v0.t
+; RV64-NEXT:    vxor.vx v12, v12, a1, v0.t
+; RV64-NEXT:    vsll.vi v14, v12, 1, v0.t
+; RV64-NEXT:    vxor.vv v14, v12, v14, v0.t
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v10, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v14, v16, v14, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v14, v0.t
+; RV64-NEXT:    vand.vv v14, v8, v12, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v10, v8, v0.t
-; RV64-NEXT:    vsrl.vi v10, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v10, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vadd.vv v8, v14, v8, v0.t
+; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v10, v10, 3, v0.t
+; RV64-NEXT:    vand.vx v10, v10, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v10, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -3230,41 +3504,31 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64(<4 x i64> %va, <4 x i1> %m, i32 zeroe
 define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v4i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vsub.vx v10, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vsrl.vi v10, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v10, v10, v12
-; RV32-NEXT:    vsub.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV32-NEXT:    vand.vv v12, v8, v10
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    vadd.vv v8, v12, v8
-; RV32-NEXT:    vsrl.vi v10, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v10
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32-NEXT:    vmv.v.x v10, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsll.vi v12, v10, 2
+; RV32-NEXT:    vxor.vv v12, v10, v12
+; RV32-NEXT:    vadd.vv v14, v12, v12
+; RV32-NEXT:    vxor.vv v14, v12, v14
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v16, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    vand.vv v14, v16, v14
+; RV32-NEXT:    vsub.vv v8, v8, v14
+; RV32-NEXT:    vand.vv v14, v8, v12
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v12
+; RV32-NEXT:    vadd.vv v8, v14, v8
+; RV32-NEXT:    vsrl.vi v12, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    vand.vv v8, v8, v10
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT:    vmv.v.x v10, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
+; RV32-NEXT:    vsrl.vi v12, v10, 3
+; RV32-NEXT:    vand.vv v10, v10, v12
 ; RV32-NEXT:    vmul.vv v8, v8, v10
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -3272,38 +3536,34 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v4i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV64-NEXT:    vmv.v.x v10, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m2, ta, ma
-; RV64-NEXT:    vsub.vx v10, v8, a1
+; RV64-NEXT:    vsll.vi v12, v10, 2
+; RV64-NEXT:    vxor.vx v12, v12, a1
+; RV64-NEXT:    vadd.vv v14, v12, v12
+; RV64-NEXT:    vxor.vv v14, v12, v14
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v16, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v10
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v10, a0
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v10, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vand.vv v14, v16, v14
+; RV64-NEXT:    vsub.vv v8, v8, v14
+; RV64-NEXT:    vand.vv v14, v8, v12
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v12
+; RV64-NEXT:    vadd.vv v8, v14, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v12
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v10, v10, 3
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vmul.vv v8, v8, v10
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -3316,41 +3576,31 @@ define <4 x i64> @vp_cttz_zero_undef_v4i64_unmasked(<4 x i64> %va, i32 zeroext %
 define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v8i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
-; RV32-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12, v0.t
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
-; RV32-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsll.vi v16, v12, 2, v0.t
+; RV32-NEXT:    vxor.vv v16, v12, v16, v0.t
+; RV32-NEXT:    vsll.vi v20, v16, 1, v0.t
+; RV32-NEXT:    vxor.vv v20, v16, v20, v0.t
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v24, v8, a0, v0.t
+; RV32-NEXT:    vnot.v v8, v8, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 1, v0.t
+; RV32-NEXT:    vand.vv v20, v24, v20, v0.t
+; RV32-NEXT:    vsub.vv v8, v8, v20, v0.t
+; RV32-NEXT:    vand.vv v20, v8, v16, v0.t
+; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vadd.vv v8, v20, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v12, 3, v0.t
+; RV32-NEXT:    vand.vv v12, v12, v16, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
@@ -3358,38 +3608,34 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v8i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v12, 2, v0.t
+; RV64-NEXT:    vxor.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vsll.vi v20, v16, 1, v0.t
+; RV64-NEXT:    vxor.vv v20, v16, v20, v0.t
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v24, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v12, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV64-NEXT:    vsrl.vi v24, v8, 1, v0.t
+; RV64-NEXT:    vand.vv v20, v24, v20, v0.t
+; RV64-NEXT:    vsub.vv v8, v8, v20, v0.t
+; RV64-NEXT:    vand.vv v20, v8, v16, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    vadd.vv v8, v12, v8, v0.t
-; RV64-NEXT:    vsrl.vi v12, v8, 4, v0.t
-; RV64-NEXT:    vadd.vv v8, v8, v12, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vadd.vv v8, v20, v8, v0.t
+; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    vsrl.vi v12, v12, 3, v0.t
+; RV64-NEXT:    vand.vx v12, v12, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v12, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
 ; RV64-NEXT:    ret
@@ -3400,41 +3646,31 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64(<8 x i64> %va, <8 x i1> %m, i32 zeroe
 define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v8i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vsub.vx v12, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vsrl.vi v12, v8, 1
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v16, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v12, v12, v16
-; RV32-NEXT:    vsub.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV32-NEXT:    vand.vv v16, v8, v12
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    vadd.vv v8, v16, v8
-; RV32-NEXT:    vsrl.vi v12, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v12
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
 ; RV32-NEXT:    vmv.v.x v12, a1
 ; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsll.vi v16, v12, 2
+; RV32-NEXT:    vxor.vv v16, v12, v16
+; RV32-NEXT:    vadd.vv v20, v16, v16
+; RV32-NEXT:    vxor.vv v20, v16, v20
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v24, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v24, v8, 1
+; RV32-NEXT:    vand.vv v20, v24, v20
+; RV32-NEXT:    vsub.vv v8, v8, v20
+; RV32-NEXT:    vand.vv v20, v8, v16
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v16
+; RV32-NEXT:    vadd.vv v8, v20, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
 ; RV32-NEXT:    vand.vv v8, v8, v12
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    vsetivli zero, 16, e32, m4, ta, ma
-; RV32-NEXT:    vmv.v.x v12, a1
-; RV32-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
+; RV32-NEXT:    vsrl.vi v16, v12, 3
+; RV32-NEXT:    vand.vv v12, v12, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v12
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
@@ -3442,38 +3678,34 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v8i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 8, e64, m4, ta, ma
+; RV64-NEXT:    vmv.v.x v12, a1
 ; RV64-NEXT:    vsetvli zero, a0, e64, m4, ta, ma
-; RV64-NEXT:    vsub.vx v12, v8, a1
+; RV64-NEXT:    vsll.vi v16, v12, 2
+; RV64-NEXT:    vxor.vx v16, v16, a1
+; RV64-NEXT:    vadd.vv v20, v16, v16
+; RV64-NEXT:    vxor.vv v20, v16, v20
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v24, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v12
-; RV64-NEXT:    vsrl.vi v12, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v12, a0
-; RV64-NEXT:    vsub.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v12, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
+; RV64-NEXT:    vsrl.vi v24, v8, 1
+; RV64-NEXT:    vand.vv v20, v24, v20
+; RV64-NEXT:    vsub.vv v8, v8, v20
+; RV64-NEXT:    vand.vv v20, v8, v16
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    vadd.vv v8, v12, v8
-; RV64-NEXT:    vsrl.vi v12, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v12
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vadd.vv v8, v20, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v12, v12, 3
+; RV64-NEXT:    vand.vx v12, v12, a1
+; RV64-NEXT:    vmul.vv v8, v8, v12
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
 ; RV64-NEXT:    ret
@@ -3486,97 +3718,173 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext %
 define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v15i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    addi a1, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
+; RV32-NEXT:    vsll.vi v24, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v24, v16, v24, v0.t
+; RV32-NEXT:    vsll.vi v16, v24, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v24, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v15i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v24, v16, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v24, 1, v0.t
+; RV64-NEXT:    vxor.vv v8, v24, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v16, v8, v24, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v24, v0.t
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %v = call <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64> %va, i1 true, <15 x i1> %m, i32 %evl)
   ret <15 x i64> %v
@@ -3585,97 +3893,109 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z
 define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v15i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v16, (a1), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v24, v16, 2
+; RV32-NEXT:    vxor.vv v24, v16, v24
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v0, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vadd.vv v0, v24, v24
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v8
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v0, v8, v24
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v24
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3
+; RV32-NEXT:    vand.vv v16, v16, v24
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v15i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1
+; RV64-NEXT:    vsll.vi v24, v16, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v0, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v0, v24, v0
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
+; RV64-NEXT:    vand.vv v16, v16, v0
 ; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vand.vv v16, v8, v24
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
 ; RV64-NEXT:    vadd.vv v8, v16, v8
 ; RV64-NEXT:    vsrl.vi v16, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <15 x i1> poison, i1 true, i32 0
   %m = shufflevector <15 x i1> %head, <15 x i1> poison, <15 x i32> zeroinitializer
@@ -3686,97 +4006,173 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex
 define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v16i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    addi a1, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a1), zero
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
+; RV32-NEXT:    vsll.vi v24, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v24, v16, v24, v0.t
+; RV32-NEXT:    vsll.vi v16, v24, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v24, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV32-NEXT:    vnot.v v8, v8, v0.t
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v24, v0.t
 ; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vand.vv v8, v8, v24, v0.t
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsrl.vi v24, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v8, v8, v24, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vadd.vv v8, v24, v8, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    addi a1, sp, 8
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v16, v24, v0.t
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0, v0.t
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v16i64:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 5
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    li a3, 24
+; RV64-NEXT:    mul a2, a2, a3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v24, v16, a1, v0.t
+; RV64-NEXT:    vsll.vi v16, v24, 1, v0.t
+; RV64-NEXT:    vxor.vv v8, v24, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 4
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v16, v8, a0, v0.t
 ; RV64-NEXT:    vnot.v v8, v8, v0.t
 ; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v16, v8, v24, v0.t
 ; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vv v8, v8, v24, v0.t
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0, v0.t
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %v = call <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64> %va, i1 true, <16 x i1> %m, i32 %evl)
   ret <16 x i64> %v
@@ -3785,97 +4181,109 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z
 define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v16i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -32
-; RV32-NEXT:    .cfi_def_cfa_offset 32
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    sub sp, sp, a1
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
 ; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    sw a1, 8(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 4(sp)
-; RV32-NEXT:    sw a1, 0(sp)
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1
-; RV32-NEXT:    vnot.v v8, v8
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a1, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v24, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v24
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a1, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v24, v8, v16
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v16, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v16
 ; RV32-NEXT:    addi a1, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
 ; RV32-NEXT:    vlse64.v v16, (a1), zero
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v24, v16, 2
+; RV32-NEXT:    vxor.vv v24, v16, v24
+; RV32-NEXT:    li a0, 1
+; RV32-NEXT:    vsub.vx v0, v8, a0
+; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vadd.vv v0, v24, v24
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v0, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v0, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v0, v0, v8
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v0
+; RV32-NEXT:    vand.vv v0, v8, v24
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vadd.vv v8, v0, v8
+; RV32-NEXT:    vsrl.vi v24, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v24
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    mv a1, sp
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a1), zero
-; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
+; RV32-NEXT:    vsrl.vi v24, v16, 3
+; RV32-NEXT:    vand.vv v16, v16, v24
 ; RV32-NEXT:    vmul.vv v8, v8, v16
 ; RV32-NEXT:    li a0, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a0
-; RV32-NEXT:    addi sp, sp, 32
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add sp, sp, a0
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v16i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a2, a1, 32
+; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    addi a2, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1
+; RV64-NEXT:    vsll.vi v24, v16, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    li a0, 1
+; RV64-NEXT:    vsub.vx v0, v8, a0
 ; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v16
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v0, v24, v0
 ; RV64-NEXT:    vsrl.vi v16, v8, 1
-; RV64-NEXT:    lui a0, 349525
-; RV64-NEXT:    addiw a0, a0, 1365
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v16, a0
+; RV64-NEXT:    vand.vv v16, v16, v0
 ; RV64-NEXT:    vsub.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 209715
-; RV64-NEXT:    addiw a0, a0, 819
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v16, v8, a0
+; RV64-NEXT:    vand.vv v16, v8, v24
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a0
+; RV64-NEXT:    vand.vv v8, v8, v24
 ; RV64-NEXT:    vadd.vv v8, v16, v8
 ; RV64-NEXT:    vsrl.vi v16, v8, 4
 ; RV64-NEXT:    vadd.vv v8, v8, v16
-; RV64-NEXT:    lui a0, 61681
-; RV64-NEXT:    addiw a0, a0, -241
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vand.vx v8, v8, a0
-; RV64-NEXT:    lui a0, 4112
-; RV64-NEXT:    addiw a0, a0, 257
-; RV64-NEXT:    slli a1, a0, 32
-; RV64-NEXT:    add a0, a0, a1
-; RV64-NEXT:    vmul.vx v8, v8, a0
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
 ; RV64-NEXT:    li a0, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a0
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <16 x i1> poison, i1 true, i32 0
   %m = shufflevector <16 x i1> %head, <16 x i1> poison, <16 x i32> zeroinitializer
@@ -3886,155 +4294,142 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex
 define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v32i64:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -48
-; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    li a2, 48
 ; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    add a1, sp, a1
-; RV32-NEXT:    addi a1, a1, 48
+; RV32-NEXT:    addi a1, a1, 16
 ; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; RV32-NEXT:    vslidedown.vi v24, v0, 2
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 44(sp)
-; RV32-NEXT:    sw a1, 40(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 36(sp)
-; RV32-NEXT:    sw a1, 32(sp)
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 20(sp)
-; RV32-NEXT:    li a3, 16
-; RV32-NEXT:    sw a1, 16(sp)
-; RV32-NEXT:    mv a2, a0
-; RV32-NEXT:    bltu a0, a3, .LBB70_2
-; RV32-NEXT:  # %bb.1:
-; RV32-NEXT:    li a2, 16
-; RV32-NEXT:  .LBB70_2:
-; RV32-NEXT:    li a1, 1
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV32-NEXT:    vnot.v v8, v8, v0.t
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a3, sp, 40
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    addi a3, sp, 32
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 48
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vsrl.vi v16, v16, 2, v0.t
-; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
-; RV32-NEXT:    addi a3, sp, 24
+; RV32-NEXT:    sw a1, 12(sp)
+; RV32-NEXT:    li a2, 16
+; RV32-NEXT:    sw a1, 8(sp)
+; RV32-NEXT:    mv a1, a0
+; RV32-NEXT:    bltu a0, a2, .LBB70_2
+; RV32-NEXT:  # %bb.1:
+; RV32-NEXT:    li a1, 16
+; RV32-NEXT:  .LBB70_2:
+; RV32-NEXT:    addi a2, sp, 8
 ; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v8, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 40
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v16, (a2), zero
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 3
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vx v8, v16, a1, v0.t
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vnot.v v8, v16, v0.t
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
-; RV32-NEXT:    addi a3, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 3
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 4
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
+; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    mul a2, a2, a3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 5
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v8, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    li a2, 56
 ; RV32-NEXT:    vsrl.vx v8, v8, a2, v0.t
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    slli a3, a3, 4
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a3, a0, -16
 ; RV32-NEXT:    sltu a0, a0, a3
@@ -4043,88 +4438,116 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV32-NEXT:    vmv1r.v v0, v24
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a3, 40
+; RV32-NEXT:    mul a0, a0, a3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vsub.vx v8, v16, a1, v0.t
-; RV32-NEXT:    vnot.v v16, v16, v0.t
-; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
-; RV32-NEXT:    addi a0, sp, 48
-; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV32-NEXT:    vsll.vi v8, v16, 2, v0.t
+; RV32-NEXT:    vxor.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV32-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a3, 24
+; RV32-NEXT:    mul a0, a0, a3
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vx v8, v16, a1, v0.t
+; RV32-NEXT:    addi a0, sp, 16
 ; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vnot.v v8, v16, v0.t
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 24
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v8, 1, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
-; RV32-NEXT:    addi a0, sp, 48
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vsub.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
-; RV32-NEXT:    vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT:    vsrl.vi v16, v8, 2, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 48
-; RV32-NEXT:    mul a0, a0, a1
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV32-NEXT:    vsrl.vi v16, v8, 4, v0.t
-; RV32-NEXT:    vadd.vv v8, v8, v16, v0.t
+; RV32-NEXT:    vadd.vv v16, v8, v16, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    li a1, 40
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 3
+; RV32-NEXT:    slli a0, a0, 5
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV32-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    vmul.vv v8, v8, v16, v0.t
 ; RV32-NEXT:    vsrl.vx v16, v8, a2, v0.t
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a1, 56
+; RV32-NEXT:    li a1, 48
 ; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 48
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v32i64:
@@ -4132,14 +4555,21 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV64-NEXT:    addi sp, sp, -16
 ; RV64-NEXT:    .cfi_def_cfa_offset 16
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    li a2, 48
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    sub sp, sp, a1
-; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb
 ; RV64-NEXT:    csrr a1, vlenb
-; RV64-NEXT:    slli a1, a1, 3
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
 ; RV64-NEXT:    add a1, sp, a1
 ; RV64-NEXT:    addi a1, a1, 16
 ; RV64-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    slli a1, a1, 4
+; RV64-NEXT:    add a1, sp, a1
+; RV64-NEXT:    addi a1, a1, 16
+; RV64-NEXT:    vs8r.v v8, (a1) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetivli zero, 2, e8, mf4, ta, ma
 ; RV64-NEXT:    li a1, 16
 ; RV64-NEXT:    vslidedown.vi v24, v0, 2
@@ -4148,72 +4578,231 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 ; RV64-NEXT:  # %bb.1:
 ; RV64-NEXT:    li a2, 16
 ; RV64-NEXT:  .LBB70_2:
-; RV64-NEXT:    li a1, 1
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
+; RV64-NEXT:    slli a3, a1, 32
+; RV64-NEXT:    add a1, a1, a3
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v8, a1
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 40
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
+; RV64-NEXT:    vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 5
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    vsll.vi v16, v8, 1, v0.t
+; RV64-NEXT:    vxor.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    li a2, 1
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v8, v16, a2, v0.t
+; RV64-NEXT:    addi a3, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vnot.v v8, v16, v0.t
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
 ; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    lui a2, 349525
-; RV64-NEXT:    addiw a2, a2, 1365
-; RV64-NEXT:    slli a3, a2, 32
-; RV64-NEXT:    add a2, a2, a3
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v8, 1, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 3
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
 ; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a3, 209715
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v16, v8, 2, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    lui a4, 61681
-; RV64-NEXT:    addiw a4, a4, -241
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    lui a5, 4112
-; RV64-NEXT:    addiw a5, a5, 257
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a6, v0.t
-; RV64-NEXT:    addi a7, sp, 16
-; RV64-NEXT:    vs8r.v v8, (a7) # Unknown-size Folded Spill
-; RV64-NEXT:    addi a7, a0, -16
-; RV64-NEXT:    sltu a0, a0, a7
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    li a4, 40
+; RV64-NEXT:    mul a3, a3, a4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v8, 3, v0.t
+; RV64-NEXT:    vand.vx v8, v16, a1, v0.t
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 5
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v16, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v8, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vmul.vv v16, v16, v8, v0.t
+; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    vsrl.vx v16, v16, a3, v0.t
+; RV64-NEXT:    csrr a4, vlenb
+; RV64-NEXT:    slli a4, a4, 4
+; RV64-NEXT:    add a4, sp, a4
+; RV64-NEXT:    addi a4, a4, 16
+; RV64-NEXT:    vs8r.v v16, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a4, a0, -16
+; RV64-NEXT:    sltu a0, a0, a4
 ; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a7
+; RV64-NEXT:    and a0, a0, a4
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
 ; RV64-NEXT:    vmv1r.v v0, v24
 ; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a4, 40
+; RV64-NEXT:    mul a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsll.vi v16, v8, 2, v0.t
+; RV64-NEXT:    vxor.vx v16, v16, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsll.vi v8, v16, 1, v0.t
+; RV64-NEXT:    vxor.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 3
 ; RV64-NEXT:    add a0, sp, a0
 ; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a4, 24
+; RV64-NEXT:    mul a0, a0, a4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
 ; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
-; RV64-NEXT:    vsub.vx v16, v8, a1, v0.t
-; RV64-NEXT:    vnot.v v8, v8, v0.t
-; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsub.vx v16, v8, a2, v0.t
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vnot.v v16, v8, v0.t
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
 ; RV64-NEXT:    vsrl.vi v16, v8, 1, v0.t
-; RV64-NEXT:    vand.vx v16, v16, a2, v0.t
-; RV64-NEXT:    vsub.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v16, v8, a3, v0.t
-; RV64-NEXT:    vsrl.vi v8, v8, 2, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a3, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v16, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vv v16, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v16, v8, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV64-NEXT:    vsrl.vi v8, v16, 2, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 5
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vv v8, v8, v16, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
 ; RV64-NEXT:    vadd.vv v8, v16, v8, v0.t
 ; RV64-NEXT:    vsrl.vi v16, v8, 4, v0.t
 ; RV64-NEXT:    vadd.vv v8, v8, v16, v0.t
-; RV64-NEXT:    vand.vx v8, v8, a4, v0.t
-; RV64-NEXT:    vmul.vx v8, v8, a5, v0.t
-; RV64-NEXT:    vsrl.vx v16, v8, a6, v0.t
-; RV64-NEXT:    addi a0, sp, 16
-; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vand.vx v8, v8, a1, v0.t
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a2, 40
+; RV64-NEXT:    mul a0, a0, a2
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3, v0.t
+; RV64-NEXT:    vand.vx v16, v16, a1, v0.t
+; RV64-NEXT:    vmul.vv v8, v8, v16, v0.t
+; RV64-NEXT:    vsrl.vx v16, v8, a3, v0.t
 ; RV64-NEXT:    csrr a0, vlenb
 ; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 48
+; RV64-NEXT:    mul a0, a0, a1
 ; RV64-NEXT:    add sp, sp, a0
 ; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
@@ -4224,193 +4813,236 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z
 define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) {
 ; RV32-LABEL: vp_cttz_zero_undef_v32i64_unmasked:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    addi sp, sp, -48
-; RV32-NEXT:    .cfi_def_cfa_offset 48
+; RV32-NEXT:    addi sp, sp, -16
+; RV32-NEXT:    .cfi_def_cfa_offset 16
 ; RV32-NEXT:    csrr a1, vlenb
-; RV32-NEXT:    slli a1, a1, 5
+; RV32-NEXT:    li a2, 24
+; RV32-NEXT:    mul a1, a1, a2
 ; RV32-NEXT:    sub sp, sp, a1
-; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 32 * vlenb
-; RV32-NEXT:    vmv8r.v v24, v16
-; RV32-NEXT:    lui a1, 349525
-; RV32-NEXT:    addi a1, a1, 1365
-; RV32-NEXT:    sw a1, 44(sp)
-; RV32-NEXT:    sw a1, 40(sp)
-; RV32-NEXT:    lui a1, 209715
-; RV32-NEXT:    addi a1, a1, 819
-; RV32-NEXT:    sw a1, 36(sp)
-; RV32-NEXT:    sw a1, 32(sp)
+; RV32-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
+; RV32-NEXT:    csrr a1, vlenb
+; RV32-NEXT:    slli a1, a1, 4
+; RV32-NEXT:    add a1, sp, a1
+; RV32-NEXT:    addi a1, a1, 16
+; RV32-NEXT:    vs8r.v v16, (a1) # Unknown-size Folded Spill
 ; RV32-NEXT:    lui a1, 61681
 ; RV32-NEXT:    addi a1, a1, -241
-; RV32-NEXT:    sw a1, 28(sp)
-; RV32-NEXT:    sw a1, 24(sp)
-; RV32-NEXT:    lui a1, 4112
-; RV32-NEXT:    addi a1, a1, 257
-; RV32-NEXT:    sw a1, 20(sp)
+; RV32-NEXT:    sw a1, 12(sp)
 ; RV32-NEXT:    li a2, 16
-; RV32-NEXT:    sw a1, 16(sp)
+; RV32-NEXT:    sw a1, 8(sp)
 ; RV32-NEXT:    mv a1, a0
 ; RV32-NEXT:    bltu a0, a2, .LBB71_2
 ; RV32-NEXT:  # %bb.1:
 ; RV32-NEXT:    li a1, 16
 ; RV32-NEXT:  .LBB71_2:
-; RV32-NEXT:    li a2, 1
+; RV32-NEXT:    addi a2, sp, 8
+; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV32-NEXT:    vlse64.v v24, (a2), zero
 ; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v16, v8, a2
+; RV32-NEXT:    li a1, 1
+; RV32-NEXT:    vsub.vx v0, v8, a1
 ; RV32-NEXT:    vnot.v v8, v8
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v0, v24, 2
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    vadd.vv v8, v0, v0
+; RV32-NEXT:    vxor.vv v8, v0, v8
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vs8r.v v8, (a2) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v8, v16, 1
+; RV32-NEXT:    csrr a2, vlenb
+; RV32-NEXT:    slli a2, a2, 3
+; RV32-NEXT:    add a2, sp, a2
+; RV32-NEXT:    addi a2, a2, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
 ; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vi v16, v8, 1
-; RV32-NEXT:    addi a3, sp, 40
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    li a4, 24
-; RV32-NEXT:    mul a3, a3, a4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v0, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v16, v16, v0
-; RV32-NEXT:    vsub.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 32
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v0, (a3), zero
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    addi a2, sp, 16
+; RV32-NEXT:    vl8r.v v16, (a2) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v16, v8
 ; RV32-NEXT:    vand.vv v16, v8, v0
 ; RV32-NEXT:    vsrl.vi v8, v8, 2
 ; RV32-NEXT:    vand.vv v8, v8, v0
 ; RV32-NEXT:    vadd.vv v8, v16, v8
 ; RV32-NEXT:    vsrl.vi v16, v8, 4
 ; RV32-NEXT:    vadd.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 24
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    csrr a3, vlenb
-; RV32-NEXT:    slli a3, a3, 4
-; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    addi a3, sp, 16
-; RV32-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
-; RV32-NEXT:    vlse64.v v16, (a3), zero
-; RV32-NEXT:    addi a3, sp, 48
-; RV32-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v16
-; RV32-NEXT:    li a1, 56
-; RV32-NEXT:    vsrl.vx v8, v8, a1
+; RV32-NEXT:    li a2, 56
+; RV32-NEXT:    vsrl.vx v8, v8, a2
 ; RV32-NEXT:    csrr a3, vlenb
 ; RV32-NEXT:    slli a3, a3, 3
 ; RV32-NEXT:    add a3, sp, a3
-; RV32-NEXT:    addi a3, a3, 48
+; RV32-NEXT:    addi a3, a3, 16
 ; RV32-NEXT:    vs8r.v v8, (a3) # Unknown-size Folded Spill
 ; RV32-NEXT:    addi a3, a0, -16
 ; RV32-NEXT:    sltu a0, a0, a3
 ; RV32-NEXT:    addi a0, a0, -1
 ; RV32-NEXT:    and a0, a0, a3
 ; RV32-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT:    vsub.vx v8, v24, a2
-; RV32-NEXT:    vnot.v v24, v24
-; RV32-NEXT:    vand.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 1
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    li a2, 24
-; RV32-NEXT:    mul a0, a0, a2
+; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v24, v24, v16
-; RV32-NEXT:    vsub.vv v8, v8, v24
-; RV32-NEXT:    vand.vv v24, v8, v0
-; RV32-NEXT:    vsrl.vi v8, v8, 2
-; RV32-NEXT:    vand.vv v8, v8, v0
-; RV32-NEXT:    vadd.vv v8, v24, v8
-; RV32-NEXT:    vsrl.vi v24, v8, 4
-; RV32-NEXT:    vadd.vv v8, v8, v24
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vx v16, v8, a1
+; RV32-NEXT:    vnot.v v0, v8
+; RV32-NEXT:    vand.vv v8, v0, v16
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vs8r.v v8, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsll.vi v0, v24, 2
+; RV32-NEXT:    vxor.vv v0, v24, v0
+; RV32-NEXT:    vadd.vv v16, v0, v0
+; RV32-NEXT:    vxor.vv v16, v0, v16
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 4
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
-; RV32-NEXT:    vand.vv v8, v8, v16
-; RV32-NEXT:    addi a0, sp, 48
-; RV32-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vs8r.v v16, (a0) # Unknown-size Folded Spill
+; RV32-NEXT:    vsrl.vi v16, v8, 1
+; RV32-NEXT:    csrr a0, vlenb
+; RV32-NEXT:    slli a0, a0, 4
+; RV32-NEXT:    add a0, sp, a0
+; RV32-NEXT:    addi a0, a0, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vand.vv v16, v16, v8
+; RV32-NEXT:    addi a0, sp, 16
+; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV32-NEXT:    vsub.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v16, v8, v0
+; RV32-NEXT:    vsrl.vi v8, v8, 2
+; RV32-NEXT:    vand.vv v8, v8, v0
+; RV32-NEXT:    vadd.vv v8, v16, v8
+; RV32-NEXT:    vsrl.vi v16, v8, 4
+; RV32-NEXT:    vadd.vv v8, v8, v16
+; RV32-NEXT:    vand.vv v8, v8, v24
+; RV32-NEXT:    vsrl.vi v16, v24, 3
+; RV32-NEXT:    vand.vv v16, v24, v16
 ; RV32-NEXT:    vmul.vv v8, v8, v16
-; RV32-NEXT:    vsrl.vx v16, v8, a1
+; RV32-NEXT:    vsrl.vx v16, v8, a2
 ; RV32-NEXT:    csrr a0, vlenb
 ; RV32-NEXT:    slli a0, a0, 3
 ; RV32-NEXT:    add a0, sp, a0
-; RV32-NEXT:    addi a0, a0, 48
+; RV32-NEXT:    addi a0, a0, 16
 ; RV32-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
 ; RV32-NEXT:    csrr a0, vlenb
-; RV32-NEXT:    slli a0, a0, 5
+; RV32-NEXT:    li a1, 24
+; RV32-NEXT:    mul a0, a0, a1
 ; RV32-NEXT:    add sp, sp, a0
-; RV32-NEXT:    addi sp, sp, 48
+; RV32-NEXT:    addi sp, sp, 16
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: vp_cttz_zero_undef_v32i64_unmasked:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    li a2, 16
-; RV64-NEXT:    mv a1, a0
-; RV64-NEXT:    bltu a0, a2, .LBB71_2
-; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    addi sp, sp, -16
+; RV64-NEXT:    .cfi_def_cfa_offset 16
+; RV64-NEXT:    csrr a1, vlenb
+; RV64-NEXT:    li a2, 24
+; RV64-NEXT:    mul a1, a1, a2
+; RV64-NEXT:    sub sp, sp, a1
+; RV64-NEXT:    .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
 ; RV64-NEXT:    li a1, 16
+; RV64-NEXT:    csrr a2, vlenb
+; RV64-NEXT:    slli a2, a2, 3
+; RV64-NEXT:    add a2, sp, a2
+; RV64-NEXT:    addi a2, a2, 16
+; RV64-NEXT:    vs8r.v v16, (a2) # Unknown-size Folded Spill
+; RV64-NEXT:    mv a2, a0
+; RV64-NEXT:    bltu a0, a1, .LBB71_2
+; RV64-NEXT:  # %bb.1:
+; RV64-NEXT:    li a2, 16
 ; RV64-NEXT:  .LBB71_2:
-; RV64-NEXT:    li a2, 1
-; RV64-NEXT:    vsetvli zero, a1, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v24, v8, a2
-; RV64-NEXT:    vnot.v v8, v8
-; RV64-NEXT:    vand.vv v8, v8, v24
-; RV64-NEXT:    vsrl.vi v24, v8, 1
-; RV64-NEXT:    lui a1, 349525
-; RV64-NEXT:    addiw a1, a1, 1365
+; RV64-NEXT:    lui a1, 61681
+; RV64-NEXT:    addiw a1, a1, -241
 ; RV64-NEXT:    slli a3, a1, 32
 ; RV64-NEXT:    add a1, a1, a3
-; RV64-NEXT:    vand.vx v24, v24, a1
-; RV64-NEXT:    vsub.vv v8, v8, v24
-; RV64-NEXT:    lui a3, 209715
-; RV64-NEXT:    addiw a3, a3, 819
-; RV64-NEXT:    slli a4, a3, 32
-; RV64-NEXT:    add a3, a3, a4
-; RV64-NEXT:    vand.vx v24, v8, a3
+; RV64-NEXT:    vsetivli zero, 16, e64, m8, ta, ma
+; RV64-NEXT:    vmv.v.x v16, a1
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vs8r.v v16, (a3) # Unknown-size Folded Spill
+; RV64-NEXT:    vsetvli zero, a2, e64, m8, ta, ma
+; RV64-NEXT:    li a2, 1
+; RV64-NEXT:    vsub.vx v0, v8, a2
+; RV64-NEXT:    vnot.v v8, v8
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vsll.vi v0, v16, 2
+; RV64-NEXT:    vxor.vx v0, v0, a1
+; RV64-NEXT:    vadd.vv v24, v0, v0
+; RV64-NEXT:    vxor.vv v24, v0, v24
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vand.vv v16, v16, v24
+; RV64-NEXT:    vsub.vv v8, v8, v16
+; RV64-NEXT:    vand.vv v16, v8, v0
 ; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a3
-; RV64-NEXT:    vadd.vv v8, v24, v8
-; RV64-NEXT:    vsrl.vi v24, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v24
-; RV64-NEXT:    lui a4, 61681
-; RV64-NEXT:    addiw a4, a4, -241
-; RV64-NEXT:    slli a5, a4, 32
-; RV64-NEXT:    add a4, a4, a5
-; RV64-NEXT:    vand.vx v8, v8, a4
-; RV64-NEXT:    lui a5, 4112
-; RV64-NEXT:    addiw a5, a5, 257
-; RV64-NEXT:    slli a6, a5, 32
-; RV64-NEXT:    add a5, a5, a6
-; RV64-NEXT:    vmul.vx v8, v8, a5
-; RV64-NEXT:    li a6, 56
-; RV64-NEXT:    vsrl.vx v8, v8, a6
-; RV64-NEXT:    addi a7, a0, -16
-; RV64-NEXT:    sltu a0, a0, a7
+; RV64-NEXT:    vand.vv v8, v8, v0
+; RV64-NEXT:    vadd.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    csrr a3, vlenb
+; RV64-NEXT:    slli a3, a3, 4
+; RV64-NEXT:    add a3, sp, a3
+; RV64-NEXT:    addi a3, a3, 16
+; RV64-NEXT:    vl8r.v v0, (a3) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v0, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    li a3, 56
+; RV64-NEXT:    vsrl.vx v8, v8, a3
+; RV64-NEXT:    addi a4, sp, 16
+; RV64-NEXT:    vs8r.v v8, (a4) # Unknown-size Folded Spill
+; RV64-NEXT:    addi a4, a0, -16
+; RV64-NEXT:    sltu a0, a0, a4
 ; RV64-NEXT:    addi a0, a0, -1
-; RV64-NEXT:    and a0, a0, a7
+; RV64-NEXT:    and a0, a0, a4
 ; RV64-NEXT:    vsetvli zero, a0, e64, m8, ta, ma
-; RV64-NEXT:    vsub.vx v24, v16, a2
-; RV64-NEXT:    vnot.v v16, v16
-; RV64-NEXT:    vand.vv v16, v16, v24
-; RV64-NEXT:    vsrl.vi v24, v16, 1
-; RV64-NEXT:    vand.vx v24, v24, a1
-; RV64-NEXT:    vsub.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v24, v16, a3
-; RV64-NEXT:    vsrl.vi v16, v16, 2
-; RV64-NEXT:    vand.vx v16, v16, a3
-; RV64-NEXT:    vadd.vv v16, v24, v16
-; RV64-NEXT:    vsrl.vi v24, v16, 4
-; RV64-NEXT:    vadd.vv v16, v16, v24
-; RV64-NEXT:    vand.vx v16, v16, a4
-; RV64-NEXT:    vmul.vx v16, v16, a5
-; RV64-NEXT:    vsrl.vx v16, v16, a6
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 3
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsub.vx v16, v8, a2
+; RV64-NEXT:    vnot.v v24, v8
+; RV64-NEXT:    vand.vv v16, v24, v16
+; RV64-NEXT:    vsll.vi v24, v0, 2
+; RV64-NEXT:    vxor.vx v24, v24, a1
+; RV64-NEXT:    vadd.vv v0, v24, v24
+; RV64-NEXT:    vxor.vv v8, v24, v0
+; RV64-NEXT:    vsrl.vi v0, v16, 1
+; RV64-NEXT:    vand.vv v8, v0, v8
+; RV64-NEXT:    vsub.vv v8, v16, v8
+; RV64-NEXT:    vand.vv v16, v8, v24
+; RV64-NEXT:    vsrl.vi v8, v8, 2
+; RV64-NEXT:    vand.vv v8, v8, v24
+; RV64-NEXT:    vadd.vv v8, v16, v8
+; RV64-NEXT:    vsrl.vi v16, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v16
+; RV64-NEXT:    vand.vx v8, v8, a1
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    slli a0, a0, 4
+; RV64-NEXT:    add a0, sp, a0
+; RV64-NEXT:    addi a0, a0, 16
+; RV64-NEXT:    vl8r.v v16, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    vsrl.vi v16, v16, 3
+; RV64-NEXT:    vand.vx v16, v16, a1
+; RV64-NEXT:    vmul.vv v8, v8, v16
+; RV64-NEXT:    vsrl.vx v16, v8, a3
+; RV64-NEXT:    addi a0, sp, 16
+; RV64-NEXT:    vl8r.v v8, (a0) # Unknown-size Folded Reload
+; RV64-NEXT:    csrr a0, vlenb
+; RV64-NEXT:    li a1, 24
+; RV64-NEXT:    mul a0, a0, a1
+; RV64-NEXT:    add sp, sp, a0
+; RV64-NEXT:    addi sp, sp, 16
 ; RV64-NEXT:    ret
   %head = insertelement <32 x i1> poison, i1 true, i32 0
   %m = shufflevector <32 x i1> %head, <32 x i1> poison, <32 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 8c8da6d1e00313..23f09eb30cab22 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -250,40 +250,31 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32I-NEXT:    vle64.v v8, (a0)
-; RV32I-NEXT:    li a1, 1
-; RV32I-NEXT:    vsub.vx v9, v8, a1
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a1, 349525
-; RV32I-NEXT:    addi a1, a1, 1365
-; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a1
-; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v9, v9, v10
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi a1, a1, 819
-; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a1
-; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v10, v8, v9
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
 ; RV32I-NEXT:    lui a1, 61681
 ; RV32I-NEXT:    addi a1, a1, -241
 ; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32I-NEXT:    vmv.v.x v9, a1
 ; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vsll.vi v10, v9, 2
+; RV32I-NEXT:    vxor.vv v10, v9, v10
+; RV32I-NEXT:    vadd.vv v11, v10, v10
+; RV32I-NEXT:    vxor.vv v11, v10, v11
+; RV32I-NEXT:    li a1, 1
+; RV32I-NEXT:    vsub.vx v12, v8, a1
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v12
+; RV32I-NEXT:    vsrl.vi v12, v8, 1
+; RV32I-NEXT:    vand.vv v11, v12, v11
+; RV32I-NEXT:    vsub.vv v8, v8, v11
+; RV32I-NEXT:    vand.vv v11, v8, v10
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vadd.vv v8, v11, v8
+; RV32I-NEXT:    vsrl.vi v10, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v10
 ; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    lui a1, 4112
-; RV32I-NEXT:    addi a1, a1, 257
-; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a1
-; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vsrl.vi v10, v9, 3
+; RV32I-NEXT:    vand.vv v9, v9, v10
 ; RV32I-NEXT:    vmul.vv v8, v8, v9
 ; RV32I-NEXT:    li a1, 56
 ; RV32I-NEXT:    vsrl.vx v8, v8, a1
@@ -651,40 +642,31 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32I-NEXT:    vle64.v v8, (a0)
-; RV32I-NEXT:    li a1, 1
-; RV32I-NEXT:    vsub.vx v10, v8, a1
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a1, 349525
-; RV32I-NEXT:    addi a1, a1, 1365
-; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v12, a1
-; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v10, v10, v12
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi a1, a1, 819
-; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a1
-; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v12, v8, v10
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vadd.vv v8, v12, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
 ; RV32I-NEXT:    lui a1, 61681
 ; RV32I-NEXT:    addi a1, a1, -241
 ; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32I-NEXT:    vmv.v.x v10, a1
 ; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vsll.vi v12, v10, 2
+; RV32I-NEXT:    vxor.vv v12, v10, v12
+; RV32I-NEXT:    vadd.vv v14, v12, v12
+; RV32I-NEXT:    vxor.vv v14, v12, v14
+; RV32I-NEXT:    li a1, 1
+; RV32I-NEXT:    vsub.vx v16, v8, a1
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v16
+; RV32I-NEXT:    vsrl.vi v16, v8, 1
+; RV32I-NEXT:    vand.vv v14, v16, v14
+; RV32I-NEXT:    vsub.vv v8, v8, v14
+; RV32I-NEXT:    vand.vv v14, v8, v12
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v12
+; RV32I-NEXT:    vadd.vv v8, v14, v8
+; RV32I-NEXT:    vsrl.vi v12, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v12
 ; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    lui a1, 4112
-; RV32I-NEXT:    addi a1, a1, 257
-; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a1
-; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vsrl.vi v12, v10, 3
+; RV32I-NEXT:    vand.vv v10, v10, v12
 ; RV32I-NEXT:    vmul.vv v8, v8, v10
 ; RV32I-NEXT:    li a1, 56
 ; RV32I-NEXT:    vsrl.vx v8, v8, a1
@@ -1029,40 +1011,31 @@ define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV32I-NEXT:    vle64.v v8, (a0)
-; RV32I-NEXT:    li a1, 1
-; RV32I-NEXT:    vsub.vx v9, v8, a1
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vsrl.vi v9, v8, 1
-; RV32I-NEXT:    lui a1, 349525
-; RV32I-NEXT:    addi a1, a1, 1365
-; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a1
-; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v9, v9, v10
-; RV32I-NEXT:    vsub.vv v8, v8, v9
-; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi a1, a1, 819
-; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a1
-; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
-; RV32I-NEXT:    vand.vv v10, v8, v9
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    vadd.vv v8, v10, v8
-; RV32I-NEXT:    vsrl.vi v9, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v9
 ; RV32I-NEXT:    lui a1, 61681
 ; RV32I-NEXT:    addi a1, a1, -241
 ; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
 ; RV32I-NEXT:    vmv.v.x v9, a1
 ; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vsll.vi v10, v9, 2
+; RV32I-NEXT:    vxor.vv v10, v9, v10
+; RV32I-NEXT:    vadd.vv v11, v10, v10
+; RV32I-NEXT:    vxor.vv v11, v10, v11
+; RV32I-NEXT:    li a1, 1
+; RV32I-NEXT:    vsub.vx v12, v8, a1
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v12
+; RV32I-NEXT:    vsrl.vi v12, v8, 1
+; RV32I-NEXT:    vand.vv v11, v12, v11
+; RV32I-NEXT:    vsub.vv v8, v8, v11
+; RV32I-NEXT:    vand.vv v11, v8, v10
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v10
+; RV32I-NEXT:    vadd.vv v8, v11, v8
+; RV32I-NEXT:    vsrl.vi v10, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v10
 ; RV32I-NEXT:    vand.vv v8, v8, v9
-; RV32I-NEXT:    lui a1, 4112
-; RV32I-NEXT:    addi a1, a1, 257
-; RV32I-NEXT:    vsetivli zero, 4, e32, m1, ta, ma
-; RV32I-NEXT:    vmv.v.x v9, a1
-; RV32I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
+; RV32I-NEXT:    vsrl.vi v10, v9, 3
+; RV32I-NEXT:    vand.vv v9, v9, v10
 ; RV32I-NEXT:    vmul.vv v8, v8, v9
 ; RV32I-NEXT:    li a1, 56
 ; RV32I-NEXT:    vsrl.vx v8, v8, a1
@@ -1400,40 +1373,31 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV32I-NEXT:    vle64.v v8, (a0)
-; RV32I-NEXT:    li a1, 1
-; RV32I-NEXT:    vsub.vx v10, v8, a1
-; RV32I-NEXT:    vnot.v v8, v8
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vsrl.vi v10, v8, 1
-; RV32I-NEXT:    lui a1, 349525
-; RV32I-NEXT:    addi a1, a1, 1365
-; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v12, a1
-; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v10, v10, v12
-; RV32I-NEXT:    vsub.vv v8, v8, v10
-; RV32I-NEXT:    lui a1, 209715
-; RV32I-NEXT:    addi a1, a1, 819
-; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a1
-; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
-; RV32I-NEXT:    vand.vv v12, v8, v10
-; RV32I-NEXT:    vsrl.vi v8, v8, 2
-; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    vadd.vv v8, v12, v8
-; RV32I-NEXT:    vsrl.vi v10, v8, 4
-; RV32I-NEXT:    vadd.vv v8, v8, v10
 ; RV32I-NEXT:    lui a1, 61681
 ; RV32I-NEXT:    addi a1, a1, -241
 ; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
 ; RV32I-NEXT:    vmv.v.x v10, a1
 ; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vsll.vi v12, v10, 2
+; RV32I-NEXT:    vxor.vv v12, v10, v12
+; RV32I-NEXT:    vadd.vv v14, v12, v12
+; RV32I-NEXT:    vxor.vv v14, v12, v14
+; RV32I-NEXT:    li a1, 1
+; RV32I-NEXT:    vsub.vx v16, v8, a1
+; RV32I-NEXT:    vnot.v v8, v8
+; RV32I-NEXT:    vand.vv v8, v8, v16
+; RV32I-NEXT:    vsrl.vi v16, v8, 1
+; RV32I-NEXT:    vand.vv v14, v16, v14
+; RV32I-NEXT:    vsub.vv v8, v8, v14
+; RV32I-NEXT:    vand.vv v14, v8, v12
+; RV32I-NEXT:    vsrl.vi v8, v8, 2
+; RV32I-NEXT:    vand.vv v8, v8, v12
+; RV32I-NEXT:    vadd.vv v8, v14, v8
+; RV32I-NEXT:    vsrl.vi v12, v8, 4
+; RV32I-NEXT:    vadd.vv v8, v8, v12
 ; RV32I-NEXT:    vand.vv v8, v8, v10
-; RV32I-NEXT:    lui a1, 4112
-; RV32I-NEXT:    addi a1, a1, 257
-; RV32I-NEXT:    vsetivli zero, 8, e32, m2, ta, ma
-; RV32I-NEXT:    vmv.v.x v10, a1
-; RV32I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
+; RV32I-NEXT:    vsrl.vi v12, v10, 3
+; RV32I-NEXT:    vand.vv v10, v10, v12
 ; RV32I-NEXT:    vmul.vv v8, v8, v10
 ; RV32I-NEXT:    li a1, 56
 ; RV32I-NEXT:    vsrl.vx v8, v8, a1

>From d6c7f4970125cfe25d7c4ba856330b175a129986 Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Wed, 27 Mar 2024 13:39:18 +0800
Subject: [PATCH 2/2] Make constants opaque

---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |    60 +-
 llvm/test/CodeGen/PowerPC/optcmp.ll           |    68 +-
 llvm/test/CodeGen/PowerPC/popcnt-zext.ll      |    31 +-
 .../PowerPC/vector-popcnt-128-ult-ugt.ll      | 13188 ++++++++--------
 .../CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll   |   124 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll  |    78 +-
 .../CodeGen/RISCV/rvv/fixed-vectors-cttz.ll   |   188 +-
 7 files changed, 6603 insertions(+), 7134 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 566a76a09d783f..5e11bf3afbf1e2 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -8681,17 +8681,21 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
   const APInt &Constant33 = APInt::getSplat(Len, APInt(8, 0x33));
   SDValue Mask33 =
       TTI.getIntImmCost(Constant33, VTTy, TargetTransformInfo::TCK_Latency) > 2
-          ? DAG.getNode(ISD::XOR, dl, VT, Mask0F,
-                        DAG.getNode(ISD::SHL, dl, VT, Mask0F,
-                                    DAG.getShiftAmountConstant(2, VT, dl)))
+          ? DAG.getNode(
+                ISD::XOR, dl, VT, Mask0F,
+                DAG.getNode(ISD::SHL, dl, VT, Mask0F,
+                            DAG.getConstant(2, dl, VT, /*isTarget=*/false,
+                                            /*isOpaque=*/true)))
           : DAG.getConstant(Constant33, dl, VT);
   // 0x55555555... = (0x33333333... ^ (0x33333333... << 1))
   const APInt &Constant55 = APInt::getSplat(Len, APInt(8, 0x55));
   SDValue Mask55 =
       TTI.getIntImmCost(Constant55, VTTy, TargetTransformInfo::TCK_Latency) > 2
-          ? DAG.getNode(ISD::XOR, dl, VT, Mask33,
-                        DAG.getNode(ISD::SHL, dl, VT, Mask33,
-                                    DAG.getShiftAmountConstant(1, VT, dl)))
+          ? DAG.getNode(
+                ISD::XOR, dl, VT, Mask33,
+                DAG.getNode(ISD::SHL, dl, VT, Mask33,
+                            DAG.getConstant(1, dl, VT, /*isTarget=*/false,
+                                            /*isOpaque=*/true)))
           : DAG.getConstant(Constant55, dl, VT);
 
   // v = v - ((v >> 1) & 0x55555555...)
@@ -8733,9 +8737,11 @@ SDValue TargetLowering::expandCTPOP(SDNode *Node, SelectionDAG &DAG) const {
   const APInt &Constant01 = APInt::getSplat(Len, APInt(8, 0x01));
   SDValue Mask01 =
       TTI.getIntImmCost(Constant01, VTTy, TargetTransformInfo::TCK_Latency) > 2
-          ? DAG.getNode(ISD::AND, dl, VT, Mask0F,
-                        DAG.getNode(ISD::SRL, dl, VT, Mask0F,
-                                    DAG.getShiftAmountConstant(3, VT, dl)))
+          ? DAG.getNode(
+                ISD::AND, dl, VT, Mask0F,
+                DAG.getNode(ISD::SRL, dl, VT, Mask0F,
+                            DAG.getConstant(3, dl, VT, /*isTarget=*/false,
+                                            /*isOpaque=*/true)))
           : DAG.getConstant(Constant01, dl, VT);
   return DAG.getNode(ISD::SRL, dl, VT,
                      DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
@@ -8770,21 +8776,25 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
   const APInt &Constant33 = APInt::getSplat(Len, APInt(8, 0x33));
   SDValue Mask33 =
       TTI.getIntImmCost(Constant33, VTTy, TargetTransformInfo::TCK_Latency) > 2
-          ? DAG.getNode(ISD::VP_XOR, dl, VT, Mask0F,
-                        DAG.getNode(ISD::VP_SHL, dl, VT, Mask0F,
-                                    DAG.getShiftAmountConstant(2, VT, dl), Mask,
-                                    VL),
-                        Mask, VL)
+          ? DAG.getNode(
+                ISD::VP_XOR, dl, VT, Mask0F,
+                DAG.getNode(ISD::VP_SHL, dl, VT, Mask0F,
+                            DAG.getConstant(2, dl, VT, /*isTarget=*/false,
+                                            /*isOpaque=*/true),
+                            Mask, VL),
+                Mask, VL)
           : DAG.getConstant(Constant33, dl, VT);
   // 0x55555555... = (0x33333333... ^ (0x33333333... << 1))
   const APInt &Constant55 = APInt::getSplat(Len, APInt(8, 0x55));
   SDValue Mask55 =
       TTI.getIntImmCost(Constant55, VTTy, TargetTransformInfo::TCK_Latency) > 2
-          ? DAG.getNode(ISD::VP_XOR, dl, VT, Mask33,
-                        DAG.getNode(ISD::VP_SHL, dl, VT, Mask33,
-                                    DAG.getShiftAmountConstant(1, VT, dl), Mask,
-                                    VL),
-                        Mask, VL)
+          ? DAG.getNode(
+                ISD::VP_XOR, dl, VT, Mask33,
+                DAG.getNode(ISD::VP_SHL, dl, VT, Mask33,
+                            DAG.getConstant(1, dl, VT, /*isTarget=*/false,
+                                            /*isOpaque=*/true),
+                            Mask, VL),
+                Mask, VL)
           : DAG.getConstant(Constant55, dl, VT);
 
   SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5;
@@ -8818,11 +8828,13 @@ SDValue TargetLowering::expandVPCTPOP(SDNode *Node, SelectionDAG &DAG) const {
   const APInt &Constant01 = APInt::getSplat(Len, APInt(8, 0x01));
   SDValue Mask01 =
       TTI.getIntImmCost(Constant01, VTTy, TargetTransformInfo::TCK_Latency) > 2
-          ? DAG.getNode(ISD::VP_AND, dl, VT, Mask0F,
-                        DAG.getNode(ISD::VP_LSHR, dl, VT, Mask0F,
-                                    DAG.getShiftAmountConstant(3, VT, dl), Mask,
-                                    VL),
-                        Mask, VL)
+          ? DAG.getNode(
+                ISD::VP_AND, dl, VT, Mask0F,
+                DAG.getNode(ISD::VP_LSHR, dl, VT, Mask0F,
+                            DAG.getConstant(3, dl, VT, /*isTarget=*/false,
+                                            /*isOpaque=*/true),
+                            Mask, VL),
+                Mask, VL)
           : DAG.getConstant(Constant01, dl, VT);
   return DAG.getNode(ISD::VP_LSHR, dl, VT,
                      DAG.getNode(ISD::VP_MUL, dl, VT, Op, Mask01, Mask, VL),
diff --git a/llvm/test/CodeGen/PowerPC/optcmp.ll b/llvm/test/CodeGen/PowerPC/optcmp.ll
index bc265c646d471e..25aaf877e3039a 100644
--- a/llvm/test/CodeGen/PowerPC/optcmp.ll
+++ b/llvm/test/CodeGen/PowerPC/optcmp.ll
@@ -280,29 +280,27 @@ declare i64 @llvm.ctpop.i64(i64);
 define signext i64 @fooct(i64 signext %a, i64 signext %b, ptr nocapture %c) #0 {
 ; CHECK-LABEL: fooct:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lis 6, 21845
+; CHECK-NEXT:    lis 6, 3855
 ; CHECK-NEXT:    sub 7, 3, 4
-; CHECK-NEXT:    ori 6, 6, 21845
-; CHECK-NEXT:    lis 9, 13107
-; CHECK-NEXT:    rotldi 8, 7, 63
+; CHECK-NEXT:    ori 6, 6, 3855
 ; CHECK-NEXT:    rldimi 6, 6, 32, 0
-; CHECK-NEXT:    and 6, 8, 6
-; CHECK-NEXT:    ori 8, 9, 13107
-; CHECK-NEXT:    sub 6, 7, 6
-; CHECK-NEXT:    rldimi 8, 8, 32, 0
+; CHECK-NEXT:    rldicl 10, 7, 63, 1
+; CHECK-NEXT:    sldi 8, 6, 2
+; CHECK-NEXT:    xor 8, 8, 6
+; CHECK-NEXT:    sldi 9, 8, 1
+; CHECK-NEXT:    xor 9, 8, 9
+; CHECK-NEXT:    and 9, 10, 9
+; CHECK-NEXT:    sub 7, 7, 9
+; CHECK-NEXT:    rldicl 9, 7, 62, 2
+; CHECK-NEXT:    and 7, 7, 8
+; CHECK-NEXT:    and 8, 9, 8
 ; CHECK-NEXT:    lis 9, 257
-; CHECK-NEXT:    rotldi 7, 6, 62
-; CHECK-NEXT:    and 6, 6, 8
+; CHECK-NEXT:    add 7, 7, 8
 ; CHECK-NEXT:    ori 9, 9, 257
-; CHECK-NEXT:    and 7, 7, 8
-; CHECK-NEXT:    lis 8, 3855
-; CHECK-NEXT:    add 6, 6, 7
-; CHECK-NEXT:    ori 7, 8, 3855
-; CHECK-NEXT:    rldicl 8, 6, 60, 4
-; CHECK-NEXT:    rldimi 7, 7, 32, 0
+; CHECK-NEXT:    rldicl 8, 7, 60, 4
 ; CHECK-NEXT:    rldimi 9, 9, 32, 0
-; CHECK-NEXT:    add 6, 6, 8
-; CHECK-NEXT:    and 6, 6, 7
+; CHECK-NEXT:    add 7, 7, 8
+; CHECK-NEXT:    and 6, 7, 6
 ; CHECK-NEXT:    mulld 6, 6, 9
 ; CHECK-NEXT:    rldicl. 6, 6, 8, 56
 ; CHECK-NEXT:    iselgt 3, 3, 4
@@ -311,29 +309,27 @@ define signext i64 @fooct(i64 signext %a, i64 signext %b, ptr nocapture %c) #0 {
 ;
 ; CHECK-NO-ISEL-LABEL: fooct:
 ; CHECK-NO-ISEL:       # %bb.0: # %entry
-; CHECK-NO-ISEL-NEXT:    lis 6, 21845
+; CHECK-NO-ISEL-NEXT:    lis 6, 3855
 ; CHECK-NO-ISEL-NEXT:    sub 7, 3, 4
-; CHECK-NO-ISEL-NEXT:    ori 6, 6, 21845
-; CHECK-NO-ISEL-NEXT:    lis 9, 13107
-; CHECK-NO-ISEL-NEXT:    rotldi 8, 7, 63
+; CHECK-NO-ISEL-NEXT:    ori 6, 6, 3855
 ; CHECK-NO-ISEL-NEXT:    rldimi 6, 6, 32, 0
-; CHECK-NO-ISEL-NEXT:    and 6, 8, 6
-; CHECK-NO-ISEL-NEXT:    ori 8, 9, 13107
-; CHECK-NO-ISEL-NEXT:    sub 6, 7, 6
-; CHECK-NO-ISEL-NEXT:    rldimi 8, 8, 32, 0
+; CHECK-NO-ISEL-NEXT:    rldicl 10, 7, 63, 1
+; CHECK-NO-ISEL-NEXT:    sldi 8, 6, 2
+; CHECK-NO-ISEL-NEXT:    xor 8, 8, 6
+; CHECK-NO-ISEL-NEXT:    sldi 9, 8, 1
+; CHECK-NO-ISEL-NEXT:    xor 9, 8, 9
+; CHECK-NO-ISEL-NEXT:    and 9, 10, 9
+; CHECK-NO-ISEL-NEXT:    sub 7, 7, 9
+; CHECK-NO-ISEL-NEXT:    rldicl 9, 7, 62, 2
+; CHECK-NO-ISEL-NEXT:    and 7, 7, 8
+; CHECK-NO-ISEL-NEXT:    and 8, 9, 8
 ; CHECK-NO-ISEL-NEXT:    lis 9, 257
-; CHECK-NO-ISEL-NEXT:    rotldi 7, 6, 62
-; CHECK-NO-ISEL-NEXT:    and 6, 6, 8
+; CHECK-NO-ISEL-NEXT:    add 7, 7, 8
 ; CHECK-NO-ISEL-NEXT:    ori 9, 9, 257
-; CHECK-NO-ISEL-NEXT:    and 7, 7, 8
-; CHECK-NO-ISEL-NEXT:    lis 8, 3855
-; CHECK-NO-ISEL-NEXT:    add 6, 6, 7
-; CHECK-NO-ISEL-NEXT:    ori 7, 8, 3855
-; CHECK-NO-ISEL-NEXT:    rldicl 8, 6, 60, 4
-; CHECK-NO-ISEL-NEXT:    rldimi 7, 7, 32, 0
+; CHECK-NO-ISEL-NEXT:    rldicl 8, 7, 60, 4
 ; CHECK-NO-ISEL-NEXT:    rldimi 9, 9, 32, 0
-; CHECK-NO-ISEL-NEXT:    add 6, 6, 8
-; CHECK-NO-ISEL-NEXT:    and 6, 6, 7
+; CHECK-NO-ISEL-NEXT:    add 7, 7, 8
+; CHECK-NO-ISEL-NEXT:    and 6, 7, 6
 ; CHECK-NO-ISEL-NEXT:    mulld 6, 6, 9
 ; CHECK-NO-ISEL-NEXT:    rldicl. 6, 6, 8, 56
 ; CHECK-NO-ISEL-NEXT:    bc 12, 1, .LBB10_2
diff --git a/llvm/test/CodeGen/PowerPC/popcnt-zext.ll b/llvm/test/CodeGen/PowerPC/popcnt-zext.ll
index fccf671e4c197a..1dd2b1c366a722 100644
--- a/llvm/test/CodeGen/PowerPC/popcnt-zext.ll
+++ b/llvm/test/CodeGen/PowerPC/popcnt-zext.ll
@@ -191,26 +191,25 @@ define i64 @zpop_i32_i64(i32 %x) {
 ;
 ; SLOW-LABEL: zpop_i32_i64:
 ; SLOW:       # %bb.0:
-; SLOW-NEXT:    rlwinm 5, 3, 31, 1, 0
-; SLOW-NEXT:    lis 4, 13107
-; SLOW-NEXT:    andis. 6, 5, 21845
-; SLOW-NEXT:    andi. 5, 5, 21845
-; SLOW-NEXT:    ori 4, 4, 13107
-; SLOW-NEXT:    or 5, 5, 6
-; SLOW-NEXT:    clrldi 3, 3, 32
-; SLOW-NEXT:    rldimi 4, 4, 32, 0
-; SLOW-NEXT:    sub 3, 3, 5
-; SLOW-NEXT:    and 5, 3, 4
-; SLOW-NEXT:    rotldi 3, 3, 62
-; SLOW-NEXT:    and 3, 3, 4
-; SLOW-NEXT:    add 3, 5, 3
 ; SLOW-NEXT:    lis 4, 3855
-; SLOW-NEXT:    rldicl 5, 3, 60, 4
 ; SLOW-NEXT:    ori 4, 4, 3855
-; SLOW-NEXT:    add 3, 3, 5
-; SLOW-NEXT:    lis 5, 257
 ; SLOW-NEXT:    rldimi 4, 4, 32, 0
+; SLOW-NEXT:    clrldi 5, 3, 32
+; SLOW-NEXT:    rlwinm 3, 3, 31, 1, 31
+; SLOW-NEXT:    sldi 6, 4, 2
+; SLOW-NEXT:    xor 6, 6, 4
+; SLOW-NEXT:    sldi 7, 6, 1
+; SLOW-NEXT:    xor 7, 6, 7
+; SLOW-NEXT:    and 3, 3, 7
+; SLOW-NEXT:    sub 3, 5, 3
+; SLOW-NEXT:    and 5, 3, 6
+; SLOW-NEXT:    rldicl 3, 3, 62, 2
+; SLOW-NEXT:    and 3, 3, 6
+; SLOW-NEXT:    add 3, 5, 3
+; SLOW-NEXT:    lis 5, 257
+; SLOW-NEXT:    rldicl 6, 3, 60, 4
 ; SLOW-NEXT:    ori 5, 5, 257
+; SLOW-NEXT:    add 3, 3, 6
 ; SLOW-NEXT:    and 3, 3, 4
 ; SLOW-NEXT:    rldimi 5, 5, 32, 0
 ; SLOW-NEXT:    mulld 3, 3, 5
diff --git a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
index ff7f1fc9029813..c0555ec7baa9e3 100644
--- a/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-popcnt-128-ult-ugt.ll
@@ -12085,41 +12085,39 @@ define <2 x i64> @ult_2_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_2_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 8, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 7, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
+; PWR5-NEXT:    lis 6, 257
+; PWR5-NEXT:    sldi 9, 5, 2
+; PWR5-NEXT:    ori 6, 6, 257
+; PWR5-NEXT:    xor 9, 9, 5
 ; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
+; PWR5-NEXT:    sldi 10, 9, 1
+; PWR5-NEXT:    xor 10, 9, 10
+; PWR5-NEXT:    and 8, 8, 10
+; PWR5-NEXT:    sub 3, 3, 8
+; PWR5-NEXT:    and 7, 7, 10
+; PWR5-NEXT:    and 8, 3, 9
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 7
+; PWR5-NEXT:    and 3, 3, 9
+; PWR5-NEXT:    and 7, 4, 9
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
 ; PWR5-NEXT:    add 3, 8, 3
-; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
-; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
-; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
-; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 9
+; PWR5-NEXT:    rldicl 8, 3, 60, 4
+; PWR5-NEXT:    add 4, 7, 4
+; PWR5-NEXT:    add 3, 3, 8
+; PWR5-NEXT:    rldicl 7, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 7
+; PWR5-NEXT:    mulld 3, 3, 6
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
-; PWR5-NEXT:    mulld 4, 4, 9
+; PWR5-NEXT:    mulld 4, 4, 6
 ; PWR5-NEXT:    li 5, 2
 ; PWR5-NEXT:    subfic 3, 3, 2
 ; PWR5-NEXT:    rldicl 4, 4, 8, 56
@@ -12130,41 +12128,39 @@ define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_2_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 8, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 7, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
+; PWR6-NEXT:    lis 6, 257
+; PWR6-NEXT:    sldi 9, 5, 2
+; PWR6-NEXT:    ori 6, 6, 257
+; PWR6-NEXT:    xor 9, 9, 5
 ; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
+; PWR6-NEXT:    sldi 10, 9, 1
+; PWR6-NEXT:    xor 10, 9, 10
+; PWR6-NEXT:    and 8, 8, 10
+; PWR6-NEXT:    sub 3, 3, 8
+; PWR6-NEXT:    and 7, 7, 10
+; PWR6-NEXT:    and 8, 3, 9
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 7
+; PWR6-NEXT:    and 3, 3, 9
+; PWR6-NEXT:    and 7, 4, 9
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
 ; PWR6-NEXT:    add 3, 8, 3
-; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
-; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
-; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
-; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 9
+; PWR6-NEXT:    rldicl 8, 3, 60, 4
+; PWR6-NEXT:    add 4, 7, 4
+; PWR6-NEXT:    add 3, 3, 8
+; PWR6-NEXT:    rldicl 7, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 7
+; PWR6-NEXT:    mulld 3, 3, 6
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
-; PWR6-NEXT:    mulld 4, 4, 9
+; PWR6-NEXT:    mulld 4, 4, 6
 ; PWR6-NEXT:    li 5, 2
 ; PWR6-NEXT:    subfic 3, 3, 2
 ; PWR6-NEXT:    rldicl 4, 4, 8, 56
@@ -12219,39 +12215,37 @@ define <2 x i64> @ugt_2_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_3_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 3
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -12264,39 +12258,37 @@ define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_3_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 3
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -12353,39 +12345,37 @@ define <2 x i64> @ult_3_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_3_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 3
@@ -12398,39 +12388,37 @@ define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_3_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 3
@@ -12487,39 +12475,37 @@ define <2 x i64> @ugt_3_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_4_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 4
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -12532,39 +12518,37 @@ define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_4_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 4
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -12621,39 +12605,37 @@ define <2 x i64> @ult_4_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_4_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 4
@@ -12666,39 +12648,37 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_4_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 4
@@ -12755,39 +12735,37 @@ define <2 x i64> @ugt_4_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_5_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 5
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -12800,39 +12778,37 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_5_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 5
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -12889,39 +12865,37 @@ define <2 x i64> @ult_5_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_5_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 5
@@ -12934,39 +12908,37 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_5_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 5
@@ -13023,39 +12995,37 @@ define <2 x i64> @ugt_5_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_6_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 6
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -13068,39 +13038,37 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_6_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 6
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -13157,39 +13125,37 @@ define <2 x i64> @ult_6_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_6_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 6
@@ -13202,39 +13168,37 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_6_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 6
@@ -13291,39 +13255,37 @@ define <2 x i64> @ugt_6_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_7_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 7
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -13336,39 +13298,37 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_7_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 7
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -13425,39 +13385,37 @@ define <2 x i64> @ult_7_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_7_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 7
@@ -13470,39 +13428,37 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_7_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 7
@@ -13559,39 +13515,37 @@ define <2 x i64> @ugt_7_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_8_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 8
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -13604,39 +13558,37 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_8_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 8
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -13693,39 +13645,37 @@ define <2 x i64> @ult_8_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_8_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 8
@@ -13738,39 +13688,37 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_8_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 8
@@ -13827,39 +13775,37 @@ define <2 x i64> @ugt_8_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_9_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_9_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 9
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -13872,39 +13818,37 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_9_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 9
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -13961,39 +13905,37 @@ define <2 x i64> @ult_9_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_9_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 9
@@ -14006,39 +13948,37 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_9_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 9
@@ -14095,39 +14035,37 @@ define <2 x i64> @ugt_9_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_10_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_10_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 10
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -14140,39 +14078,37 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_10_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 10
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -14229,39 +14165,37 @@ define <2 x i64> @ult_10_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_10_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 10
@@ -14274,39 +14208,37 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_10_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 10
@@ -14363,39 +14295,37 @@ define <2 x i64> @ugt_10_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_11_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_11_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 11
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -14408,39 +14338,37 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_11_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 11
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -14497,39 +14425,37 @@ define <2 x i64> @ult_11_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_11_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 11
@@ -14542,39 +14468,37 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_11_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 11
@@ -14631,39 +14555,37 @@ define <2 x i64> @ugt_11_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_12_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_12_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 12
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -14676,39 +14598,37 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_12_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 12
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -14765,39 +14685,37 @@ define <2 x i64> @ult_12_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_12_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 12
@@ -14810,39 +14728,37 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_12_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 12
@@ -14899,39 +14815,37 @@ define <2 x i64> @ugt_12_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_13_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_13_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 13
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -14944,39 +14858,37 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_13_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 13
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -15033,39 +14945,37 @@ define <2 x i64> @ult_13_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_13_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 13
@@ -15078,39 +14988,37 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_13_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 13
@@ -15167,39 +15075,37 @@ define <2 x i64> @ugt_13_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_14_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_14_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 14
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -15212,39 +15118,37 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_14_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 14
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -15301,39 +15205,37 @@ define <2 x i64> @ult_14_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_14_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 14
@@ -15346,39 +15248,37 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_14_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 14
@@ -15435,39 +15335,37 @@ define <2 x i64> @ugt_14_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_15_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_15_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 15
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -15480,39 +15378,37 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_15_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 15
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -15569,39 +15465,37 @@ define <2 x i64> @ult_15_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_15_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 15
@@ -15614,39 +15508,37 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_15_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 15
@@ -15703,39 +15595,37 @@ define <2 x i64> @ugt_15_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_16_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_16_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 16
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -15748,39 +15638,37 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_16_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 16
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -15837,39 +15725,37 @@ define <2 x i64> @ult_16_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_16_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 16
@@ -15882,39 +15768,37 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_16_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 16
@@ -15971,39 +15855,37 @@ define <2 x i64> @ugt_16_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_17_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_17_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 17
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -16016,39 +15898,37 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_17_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 17
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -16105,39 +15985,37 @@ define <2 x i64> @ult_17_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_17_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 17
@@ -16150,39 +16028,37 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_17_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 17
@@ -16239,39 +16115,37 @@ define <2 x i64> @ugt_17_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_18_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_18_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 18
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -16284,39 +16158,37 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_18_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 18
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -16373,39 +16245,37 @@ define <2 x i64> @ult_18_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_18_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 18
@@ -16418,39 +16288,37 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_18_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 18
@@ -16507,39 +16375,37 @@ define <2 x i64> @ugt_18_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_19_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_19_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 19
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -16552,39 +16418,37 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_19_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 19
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -16641,39 +16505,37 @@ define <2 x i64> @ult_19_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_19_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 19
@@ -16686,39 +16548,37 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_19_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 19
@@ -16775,39 +16635,37 @@ define <2 x i64> @ugt_19_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_20_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_20_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 20
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -16820,39 +16678,37 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_20_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 20
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -16909,39 +16765,37 @@ define <2 x i64> @ult_20_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_20_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 20
@@ -16954,39 +16808,37 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_20_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 20
@@ -17043,39 +16895,37 @@ define <2 x i64> @ugt_20_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_21_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_21_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 21
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -17088,39 +16938,37 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_21_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 21
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -17177,39 +17025,37 @@ define <2 x i64> @ult_21_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_21_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 21
@@ -17222,39 +17068,37 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_21_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 21
@@ -17311,39 +17155,37 @@ define <2 x i64> @ugt_21_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_22_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_22_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 22
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -17356,39 +17198,37 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_22_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 22
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -17445,39 +17285,37 @@ define <2 x i64> @ult_22_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_22_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 22
@@ -17490,39 +17328,37 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_22_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 22
@@ -17579,39 +17415,37 @@ define <2 x i64> @ugt_22_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_23_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_23_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 23
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -17624,39 +17458,37 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_23_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 23
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -17713,39 +17545,37 @@ define <2 x i64> @ult_23_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_23_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 23
@@ -17758,39 +17588,37 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_23_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 23
@@ -17847,39 +17675,37 @@ define <2 x i64> @ugt_23_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_24_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_24_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 24
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -17892,39 +17718,37 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_24_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 24
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -17981,39 +17805,37 @@ define <2 x i64> @ult_24_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_24_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 24
@@ -18026,39 +17848,37 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_24_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 24
@@ -18115,39 +17935,37 @@ define <2 x i64> @ugt_24_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_25_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_25_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 25
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -18160,39 +17978,37 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_25_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 25
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -18249,39 +18065,37 @@ define <2 x i64> @ult_25_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_25_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 25
@@ -18294,39 +18108,37 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_25_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 25
@@ -18383,39 +18195,37 @@ define <2 x i64> @ugt_25_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_26_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_26_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 26
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -18428,39 +18238,37 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_26_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 26
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -18517,39 +18325,37 @@ define <2 x i64> @ult_26_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_26_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 26
@@ -18562,39 +18368,37 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_26_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 26
@@ -18651,39 +18455,37 @@ define <2 x i64> @ugt_26_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_27_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_27_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 27
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -18696,39 +18498,37 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_27_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 27
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -18785,39 +18585,37 @@ define <2 x i64> @ult_27_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_27_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 27
@@ -18830,39 +18628,37 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_27_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 27
@@ -18919,39 +18715,37 @@ define <2 x i64> @ugt_27_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_28_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_28_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 28
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -18964,39 +18758,37 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_28_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 28
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -19053,39 +18845,37 @@ define <2 x i64> @ult_28_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_28_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 28
@@ -19098,39 +18888,37 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_28_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 28
@@ -19187,39 +18975,37 @@ define <2 x i64> @ugt_28_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_29_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_29_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 29
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -19232,39 +19018,37 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_29_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 29
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -19321,39 +19105,37 @@ define <2 x i64> @ult_29_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_29_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 29
@@ -19366,39 +19148,37 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_29_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 29
@@ -19455,39 +19235,37 @@ define <2 x i64> @ugt_29_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_30_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_30_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 30
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -19500,39 +19278,37 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_30_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 30
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -19589,39 +19365,37 @@ define <2 x i64> @ult_30_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_30_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 30
@@ -19634,39 +19408,37 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_30_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 30
@@ -19723,39 +19495,37 @@ define <2 x i64> @ugt_30_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_31_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_31_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 31
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -19768,39 +19538,37 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_31_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 31
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -19857,39 +19625,37 @@ define <2 x i64> @ult_31_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_31_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 31
@@ -19902,39 +19668,37 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_31_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 31
@@ -19991,39 +19755,37 @@ define <2 x i64> @ugt_31_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_32_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_32_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 32
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -20036,39 +19798,37 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_32_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 32
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -20125,39 +19885,37 @@ define <2 x i64> @ult_32_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_32_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 32
@@ -20170,39 +19928,37 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_32_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 32
@@ -20259,39 +20015,37 @@ define <2 x i64> @ugt_32_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_33_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_33_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 33
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -20304,39 +20058,37 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_33_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 33
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -20393,39 +20145,37 @@ define <2 x i64> @ult_33_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_33_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 33
@@ -20438,39 +20188,37 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_33_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 33
@@ -20527,39 +20275,37 @@ define <2 x i64> @ugt_33_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_34_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_34_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 34
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -20572,39 +20318,37 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_34_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 34
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -20661,39 +20405,37 @@ define <2 x i64> @ult_34_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_34_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 34
@@ -20706,39 +20448,37 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_34_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 34
@@ -20795,39 +20535,37 @@ define <2 x i64> @ugt_34_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_35_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_35_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 35
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -20840,39 +20578,37 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_35_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 35
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -20929,39 +20665,37 @@ define <2 x i64> @ult_35_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_35_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 35
@@ -20974,39 +20708,37 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_35_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 35
@@ -21063,39 +20795,37 @@ define <2 x i64> @ugt_35_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_36_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_36_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 36
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -21108,39 +20838,37 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_36_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 36
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -21197,39 +20925,37 @@ define <2 x i64> @ult_36_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_36_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 36
@@ -21242,39 +20968,37 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_36_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 36
@@ -21331,39 +21055,37 @@ define <2 x i64> @ugt_36_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_37_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_37_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 37
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -21376,39 +21098,37 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_37_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 37
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -21465,39 +21185,37 @@ define <2 x i64> @ult_37_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_37_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 37
@@ -21510,39 +21228,37 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_37_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 37
@@ -21599,39 +21315,37 @@ define <2 x i64> @ugt_37_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_38_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_38_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 38
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -21644,39 +21358,37 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_38_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 38
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -21733,39 +21445,37 @@ define <2 x i64> @ult_38_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_38_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 38
@@ -21778,39 +21488,37 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_38_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 38
@@ -21867,39 +21575,37 @@ define <2 x i64> @ugt_38_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_39_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_39_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 39
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -21912,39 +21618,37 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_39_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 39
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -22001,39 +21705,37 @@ define <2 x i64> @ult_39_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_39_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 39
@@ -22046,39 +21748,37 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_39_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 39
@@ -22135,39 +21835,37 @@ define <2 x i64> @ugt_39_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_40_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_40_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 40
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -22180,39 +21878,37 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_40_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 40
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -22269,39 +21965,37 @@ define <2 x i64> @ult_40_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_40_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 40
@@ -22314,39 +22008,37 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_40_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 40
@@ -22403,39 +22095,37 @@ define <2 x i64> @ugt_40_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_41_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_41_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 41
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -22448,39 +22138,37 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_41_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 41
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -22537,39 +22225,37 @@ define <2 x i64> @ult_41_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_41_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 41
@@ -22582,39 +22268,37 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_41_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 41
@@ -22671,39 +22355,37 @@ define <2 x i64> @ugt_41_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_42_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_42_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 42
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -22716,39 +22398,37 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_42_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 42
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -22805,39 +22485,37 @@ define <2 x i64> @ult_42_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_42_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 42
@@ -22850,39 +22528,37 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_42_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 42
@@ -22939,39 +22615,37 @@ define <2 x i64> @ugt_42_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_43_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_43_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 43
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -22984,39 +22658,37 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_43_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 43
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -23073,39 +22745,37 @@ define <2 x i64> @ult_43_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_43_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 43
@@ -23118,39 +22788,37 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_43_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 43
@@ -23207,39 +22875,37 @@ define <2 x i64> @ugt_43_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_44_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_44_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 44
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -23252,39 +22918,37 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_44_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 44
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -23341,39 +23005,37 @@ define <2 x i64> @ult_44_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_44_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 44
@@ -23386,39 +23048,37 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_44_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 44
@@ -23475,39 +23135,37 @@ define <2 x i64> @ugt_44_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_45_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_45_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 45
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -23520,39 +23178,37 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_45_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 45
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -23609,39 +23265,37 @@ define <2 x i64> @ult_45_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_45_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 45
@@ -23654,39 +23308,37 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_45_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 45
@@ -23743,39 +23395,37 @@ define <2 x i64> @ugt_45_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_46_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_46_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 46
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -23788,39 +23438,37 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_46_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 46
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -23877,39 +23525,37 @@ define <2 x i64> @ult_46_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_46_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 46
@@ -23922,39 +23568,37 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_46_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 46
@@ -24011,39 +23655,37 @@ define <2 x i64> @ugt_46_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_47_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_47_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 47
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -24056,39 +23698,37 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_47_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 47
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -24145,39 +23785,37 @@ define <2 x i64> @ult_47_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_47_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 47
@@ -24190,39 +23828,37 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_47_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 47
@@ -24279,39 +23915,37 @@ define <2 x i64> @ugt_47_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_48_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_48_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 48
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -24324,39 +23958,37 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_48_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 48
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -24413,39 +24045,37 @@ define <2 x i64> @ult_48_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_48_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 48
@@ -24458,39 +24088,37 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_48_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 48
@@ -24547,39 +24175,37 @@ define <2 x i64> @ugt_48_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_49_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_49_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 49
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -24592,39 +24218,37 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_49_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 49
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -24681,39 +24305,37 @@ define <2 x i64> @ult_49_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_49_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 49
@@ -24726,39 +24348,37 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_49_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 49
@@ -24815,39 +24435,37 @@ define <2 x i64> @ugt_49_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_50_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_50_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 50
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -24860,39 +24478,37 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_50_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 50
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -24949,39 +24565,37 @@ define <2 x i64> @ult_50_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_50_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 50
@@ -24994,39 +24608,37 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_50_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 50
@@ -25083,39 +24695,37 @@ define <2 x i64> @ugt_50_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_51_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_51_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 51
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -25128,39 +24738,37 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_51_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 51
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -25217,39 +24825,37 @@ define <2 x i64> @ult_51_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_51_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 51
@@ -25262,39 +24868,37 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_51_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 51
@@ -25351,39 +24955,37 @@ define <2 x i64> @ugt_51_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_52_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_52_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 52
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -25396,39 +24998,37 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_52_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 52
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -25485,39 +25085,37 @@ define <2 x i64> @ult_52_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_52_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 52
@@ -25530,39 +25128,37 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_52_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 52
@@ -25619,39 +25215,37 @@ define <2 x i64> @ugt_52_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_53_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_53_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 53
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -25664,39 +25258,37 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_53_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 53
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -25753,39 +25345,37 @@ define <2 x i64> @ult_53_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_53_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 53
@@ -25798,39 +25388,37 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_53_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 53
@@ -25887,39 +25475,37 @@ define <2 x i64> @ugt_53_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_54_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_54_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 54
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -25932,39 +25518,37 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_54_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 54
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -26021,39 +25605,37 @@ define <2 x i64> @ult_54_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_54_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 54
@@ -26066,39 +25648,37 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_54_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 54
@@ -26155,39 +25735,37 @@ define <2 x i64> @ugt_54_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_55_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_55_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 55
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -26200,39 +25778,37 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_55_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 55
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -26289,39 +25865,37 @@ define <2 x i64> @ult_55_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_55_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 55
@@ -26334,39 +25908,37 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_55_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 55
@@ -26423,39 +25995,37 @@ define <2 x i64> @ugt_55_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_56_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_56_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -26468,39 +26038,37 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_56_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -26557,39 +26125,37 @@ define <2 x i64> @ult_56_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_56_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 56
@@ -26602,39 +26168,37 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_56_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 56
@@ -26691,39 +26255,37 @@ define <2 x i64> @ugt_56_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_57_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_57_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 57
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -26736,39 +26298,37 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_57_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 57
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -26825,39 +26385,37 @@ define <2 x i64> @ult_57_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_57_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 57
@@ -26870,39 +26428,37 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_57_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 57
@@ -26959,39 +26515,37 @@ define <2 x i64> @ugt_57_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_58_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_58_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 58
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -27004,39 +26558,37 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_58_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 58
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -27093,39 +26645,37 @@ define <2 x i64> @ult_58_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_58_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 58
@@ -27138,39 +26688,37 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_58_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 58
@@ -27227,39 +26775,37 @@ define <2 x i64> @ugt_58_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_59_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_59_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 59
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -27272,39 +26818,37 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_59_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 59
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -27361,39 +26905,37 @@ define <2 x i64> @ult_59_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_59_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 59
@@ -27406,39 +26948,37 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_59_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 59
@@ -27495,39 +27035,37 @@ define <2 x i64> @ugt_59_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_60_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_60_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 60
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -27540,39 +27078,37 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_60_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 60
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -27629,39 +27165,37 @@ define <2 x i64> @ult_60_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_60_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 60
@@ -27674,39 +27208,37 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_60_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 60
@@ -27763,39 +27295,37 @@ define <2 x i64> @ugt_60_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_61_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_61_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 61
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -27808,39 +27338,37 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_61_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 61
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -27897,39 +27425,37 @@ define <2 x i64> @ult_61_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_61_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 61
@@ -27942,39 +27468,37 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_61_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 61
@@ -28031,39 +27555,37 @@ define <2 x i64> @ugt_61_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_62_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_62_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 62
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -28076,39 +27598,37 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_62_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 62
 ; PWR6-NEXT:    mulld 4, 4, 9
@@ -28165,39 +27685,37 @@ define <2 x i64> @ult_62_v2i64(<2 x i64> %0) {
 define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ugt_62_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    mulld 4, 4, 9
 ; PWR5-NEXT:    li 5, 62
@@ -28210,39 +27728,37 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ugt_62_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    mulld 4, 4, 9
 ; PWR6-NEXT:    li 5, 62
@@ -28299,39 +27815,37 @@ define <2 x i64> @ugt_62_v2i64(<2 x i64> %0) {
 define <2 x i64> @ult_63_v2i64(<2 x i64> %0) {
 ; PWR5-LABEL: ult_63_v2i64:
 ; PWR5:       # %bb.0:
-; PWR5-NEXT:    lis 5, 21845
-; PWR5-NEXT:    lis 6, 13107
-; PWR5-NEXT:    ori 5, 5, 21845
-; PWR5-NEXT:    rotldi 8, 4, 63
-; PWR5-NEXT:    rotldi 9, 3, 63
+; PWR5-NEXT:    lis 5, 3855
+; PWR5-NEXT:    rldicl 7, 3, 63, 1
+; PWR5-NEXT:    ori 5, 5, 3855
+; PWR5-NEXT:    rldicl 6, 4, 63, 1
 ; PWR5-NEXT:    rldimi 5, 5, 32, 0
-; PWR5-NEXT:    and 8, 8, 5
-; PWR5-NEXT:    and 5, 9, 5
-; PWR5-NEXT:    ori 6, 6, 13107
-; PWR5-NEXT:    sub 3, 3, 5
-; PWR5-NEXT:    rldimi 6, 6, 32, 0
-; PWR5-NEXT:    sub 4, 4, 8
-; PWR5-NEXT:    and 8, 3, 6
-; PWR5-NEXT:    rotldi 3, 3, 62
-; PWR5-NEXT:    and 3, 3, 6
-; PWR5-NEXT:    lis 7, 3855
-; PWR5-NEXT:    and 5, 4, 6
-; PWR5-NEXT:    rotldi 4, 4, 62
-; PWR5-NEXT:    add 3, 8, 3
+; PWR5-NEXT:    sldi 8, 5, 2
+; PWR5-NEXT:    xor 8, 8, 5
+; PWR5-NEXT:    sldi 9, 8, 1
+; PWR5-NEXT:    xor 9, 8, 9
+; PWR5-NEXT:    and 7, 7, 9
+; PWR5-NEXT:    sub 3, 3, 7
+; PWR5-NEXT:    and 6, 6, 9
+; PWR5-NEXT:    and 7, 3, 8
+; PWR5-NEXT:    rldicl 3, 3, 62, 2
+; PWR5-NEXT:    sub 4, 4, 6
+; PWR5-NEXT:    and 3, 3, 8
+; PWR5-NEXT:    and 6, 4, 8
+; PWR5-NEXT:    rldicl 4, 4, 62, 2
+; PWR5-NEXT:    add 3, 7, 3
 ; PWR5-NEXT:    lis 9, 257
-; PWR5-NEXT:    ori 7, 7, 3855
-; PWR5-NEXT:    and 4, 4, 6
-; PWR5-NEXT:    rldicl 6, 3, 60, 4
+; PWR5-NEXT:    and 4, 4, 8
+; PWR5-NEXT:    rldicl 7, 3, 60, 4
 ; PWR5-NEXT:    ori 9, 9, 257
-; PWR5-NEXT:    rldimi 7, 7, 32, 0
-; PWR5-NEXT:    add 4, 5, 4
-; PWR5-NEXT:    add 3, 3, 6
+; PWR5-NEXT:    add 4, 6, 4
+; PWR5-NEXT:    add 3, 3, 7
 ; PWR5-NEXT:    rldimi 9, 9, 32, 0
-; PWR5-NEXT:    rldicl 5, 4, 60, 4
-; PWR5-NEXT:    and 3, 3, 7
-; PWR5-NEXT:    add 4, 4, 5
+; PWR5-NEXT:    rldicl 6, 4, 60, 4
+; PWR5-NEXT:    and 3, 3, 5
+; PWR5-NEXT:    add 4, 4, 6
 ; PWR5-NEXT:    mulld 3, 3, 9
-; PWR5-NEXT:    and 4, 4, 7
+; PWR5-NEXT:    and 4, 4, 5
 ; PWR5-NEXT:    rldicl 3, 3, 8, 56
 ; PWR5-NEXT:    li 5, 63
 ; PWR5-NEXT:    mulld 4, 4, 9
@@ -28344,39 +27858,37 @@ define <2 x i64> @ult_63_v2i64(<2 x i64> %0) {
 ;
 ; PWR6-LABEL: ult_63_v2i64:
 ; PWR6:       # %bb.0:
-; PWR6-NEXT:    lis 5, 21845
-; PWR6-NEXT:    lis 6, 13107
-; PWR6-NEXT:    ori 5, 5, 21845
-; PWR6-NEXT:    rotldi 8, 4, 63
-; PWR6-NEXT:    rotldi 9, 3, 63
+; PWR6-NEXT:    lis 5, 3855
+; PWR6-NEXT:    rldicl 7, 3, 63, 1
+; PWR6-NEXT:    ori 5, 5, 3855
+; PWR6-NEXT:    rldicl 6, 4, 63, 1
 ; PWR6-NEXT:    rldimi 5, 5, 32, 0
-; PWR6-NEXT:    and 8, 8, 5
-; PWR6-NEXT:    and 5, 9, 5
-; PWR6-NEXT:    ori 6, 6, 13107
-; PWR6-NEXT:    sub 3, 3, 5
-; PWR6-NEXT:    rldimi 6, 6, 32, 0
-; PWR6-NEXT:    sub 4, 4, 8
-; PWR6-NEXT:    and 8, 3, 6
-; PWR6-NEXT:    rotldi 3, 3, 62
-; PWR6-NEXT:    and 3, 3, 6
-; PWR6-NEXT:    lis 7, 3855
-; PWR6-NEXT:    and 5, 4, 6
-; PWR6-NEXT:    rotldi 4, 4, 62
-; PWR6-NEXT:    add 3, 8, 3
+; PWR6-NEXT:    sldi 8, 5, 2
+; PWR6-NEXT:    xor 8, 8, 5
+; PWR6-NEXT:    sldi 9, 8, 1
+; PWR6-NEXT:    xor 9, 8, 9
+; PWR6-NEXT:    and 7, 7, 9
+; PWR6-NEXT:    sub 3, 3, 7
+; PWR6-NEXT:    and 6, 6, 9
+; PWR6-NEXT:    and 7, 3, 8
+; PWR6-NEXT:    rldicl 3, 3, 62, 2
+; PWR6-NEXT:    sub 4, 4, 6
+; PWR6-NEXT:    and 3, 3, 8
+; PWR6-NEXT:    and 6, 4, 8
+; PWR6-NEXT:    rldicl 4, 4, 62, 2
+; PWR6-NEXT:    add 3, 7, 3
 ; PWR6-NEXT:    lis 9, 257
-; PWR6-NEXT:    ori 7, 7, 3855
-; PWR6-NEXT:    and 4, 4, 6
-; PWR6-NEXT:    rldicl 6, 3, 60, 4
+; PWR6-NEXT:    and 4, 4, 8
+; PWR6-NEXT:    rldicl 7, 3, 60, 4
 ; PWR6-NEXT:    ori 9, 9, 257
-; PWR6-NEXT:    rldimi 7, 7, 32, 0
-; PWR6-NEXT:    add 4, 5, 4
-; PWR6-NEXT:    add 3, 3, 6
+; PWR6-NEXT:    add 4, 6, 4
+; PWR6-NEXT:    add 3, 3, 7
 ; PWR6-NEXT:    rldimi 9, 9, 32, 0
-; PWR6-NEXT:    rldicl 5, 4, 60, 4
-; PWR6-NEXT:    and 3, 3, 7
-; PWR6-NEXT:    add 4, 4, 5
+; PWR6-NEXT:    rldicl 6, 4, 60, 4
+; PWR6-NEXT:    and 3, 3, 5
+; PWR6-NEXT:    add 4, 4, 6
 ; PWR6-NEXT:    mulld 3, 3, 9
-; PWR6-NEXT:    and 4, 4, 7
+; PWR6-NEXT:    and 4, 4, 5
 ; PWR6-NEXT:    rldicl 3, 3, 8, 56
 ; PWR6-NEXT:    li 5, 63
 ; PWR6-NEXT:    mulld 4, 4, 9
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
index 5e6844bef8d62e..198ff081dfc54e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz.ll
@@ -307,32 +307,27 @@ define void @ctlz_v2i64(ptr %x, ptr %y) nounwind {
 ; RV64I-NEXT:    vor.vv v8, v8, v9
 ; RV64I-NEXT:    vnot.v v8, v8
 ; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a1, 349525
-; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
 ; RV64I-NEXT:    slli a2, a1, 32
 ; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v9, v9, a1
+; RV64I-NEXT:    vmv.v.x v10, a1
+; RV64I-NEXT:    vsll.vi v11, v10, 2
+; RV64I-NEXT:    vxor.vx v11, v11, a1
+; RV64I-NEXT:    vadd.vv v12, v11, v11
+; RV64I-NEXT:    vxor.vv v12, v11, v12
+; RV64I-NEXT:    vand.vv v9, v9, v12
 ; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a1, 209715
-; RV64I-NEXT:    addiw a1, a1, 819
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v9, v8, a1
+; RV64I-NEXT:    vand.vv v9, v8, v11
 ; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    vand.vv v8, v8, v11
 ; RV64I-NEXT:    vadd.vv v8, v9, v8
 ; RV64I-NEXT:    vsrl.vi v9, v8, 4
 ; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a1, 61681
-; RV64I-NEXT:    addiw a1, a1, -241
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
 ; RV64I-NEXT:    vand.vx v8, v8, a1
-; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw a1, a1, 257
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    vsrl.vi v9, v10, 3
+; RV64I-NEXT:    vand.vx v9, v9, a1
+; RV64I-NEXT:    vmul.vv v8, v8, v9
 ; RV64I-NEXT:    li a1, 56
 ; RV64I-NEXT:    vsrl.vx v8, v8, a1
 ; RV64I-NEXT:    vse64.v v8, (a0)
@@ -707,32 +702,27 @@ define void @ctlz_v4i64(ptr %x, ptr %y) nounwind {
 ; RV64I-NEXT:    vor.vv v8, v8, v10
 ; RV64I-NEXT:    vnot.v v8, v8
 ; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    lui a1, 349525
-; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
 ; RV64I-NEXT:    slli a2, a1, 32
 ; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v10, v10, a1
+; RV64I-NEXT:    vmv.v.x v12, a1
+; RV64I-NEXT:    vsll.vi v14, v12, 2
+; RV64I-NEXT:    vxor.vx v14, v14, a1
+; RV64I-NEXT:    vadd.vv v16, v14, v14
+; RV64I-NEXT:    vxor.vv v16, v14, v16
+; RV64I-NEXT:    vand.vv v10, v10, v16
 ; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    lui a1, 209715
-; RV64I-NEXT:    addiw a1, a1, 819
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v10, v8, a1
+; RV64I-NEXT:    vand.vv v10, v8, v14
 ; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    vand.vv v8, v8, v14
 ; RV64I-NEXT:    vadd.vv v8, v10, v8
 ; RV64I-NEXT:    vsrl.vi v10, v8, 4
 ; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    lui a1, 61681
-; RV64I-NEXT:    addiw a1, a1, -241
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
 ; RV64I-NEXT:    vand.vx v8, v8, a1
-; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw a1, a1, 257
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    vsrl.vi v10, v12, 3
+; RV64I-NEXT:    vand.vx v10, v10, a1
+; RV64I-NEXT:    vmul.vv v8, v8, v10
 ; RV64I-NEXT:    li a1, 56
 ; RV64I-NEXT:    vsrl.vx v8, v8, a1
 ; RV64I-NEXT:    vse64.v v8, (a0)
@@ -1088,32 +1078,27 @@ define void @ctlz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
 ; RV64I-NEXT:    vor.vv v8, v8, v9
 ; RV64I-NEXT:    vnot.v v8, v8
 ; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a1, 349525
-; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
 ; RV64I-NEXT:    slli a2, a1, 32
 ; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v9, v9, a1
+; RV64I-NEXT:    vmv.v.x v10, a1
+; RV64I-NEXT:    vsll.vi v11, v10, 2
+; RV64I-NEXT:    vxor.vx v11, v11, a1
+; RV64I-NEXT:    vadd.vv v12, v11, v11
+; RV64I-NEXT:    vxor.vv v12, v11, v12
+; RV64I-NEXT:    vand.vv v9, v9, v12
 ; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a1, 209715
-; RV64I-NEXT:    addiw a1, a1, 819
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v9, v8, a1
+; RV64I-NEXT:    vand.vv v9, v8, v11
 ; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    vand.vv v8, v8, v11
 ; RV64I-NEXT:    vadd.vv v8, v9, v8
 ; RV64I-NEXT:    vsrl.vi v9, v8, 4
 ; RV64I-NEXT:    vadd.vv v8, v8, v9
-; RV64I-NEXT:    lui a1, 61681
-; RV64I-NEXT:    addiw a1, a1, -241
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
 ; RV64I-NEXT:    vand.vx v8, v8, a1
-; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw a1, a1, 257
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    vsrl.vi v9, v10, 3
+; RV64I-NEXT:    vand.vx v9, v9, a1
+; RV64I-NEXT:    vmul.vv v8, v8, v9
 ; RV64I-NEXT:    li a1, 56
 ; RV64I-NEXT:    vsrl.vx v8, v8, a1
 ; RV64I-NEXT:    vse64.v v8, (a0)
@@ -1464,32 +1449,27 @@ define void @ctlz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
 ; RV64I-NEXT:    vor.vv v8, v8, v10
 ; RV64I-NEXT:    vnot.v v8, v8
 ; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    lui a1, 349525
-; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    lui a1, 61681
+; RV64I-NEXT:    addiw a1, a1, -241
 ; RV64I-NEXT:    slli a2, a1, 32
 ; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v10, v10, a1
+; RV64I-NEXT:    vmv.v.x v12, a1
+; RV64I-NEXT:    vsll.vi v14, v12, 2
+; RV64I-NEXT:    vxor.vx v14, v14, a1
+; RV64I-NEXT:    vadd.vv v16, v14, v14
+; RV64I-NEXT:    vxor.vv v16, v14, v16
+; RV64I-NEXT:    vand.vv v10, v10, v16
 ; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    lui a1, 209715
-; RV64I-NEXT:    addiw a1, a1, 819
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v10, v8, a1
+; RV64I-NEXT:    vand.vv v10, v8, v14
 ; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a1
+; RV64I-NEXT:    vand.vv v8, v8, v14
 ; RV64I-NEXT:    vadd.vv v8, v10, v8
 ; RV64I-NEXT:    vsrl.vi v10, v8, 4
 ; RV64I-NEXT:    vadd.vv v8, v8, v10
-; RV64I-NEXT:    lui a1, 61681
-; RV64I-NEXT:    addiw a1, a1, -241
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
 ; RV64I-NEXT:    vand.vx v8, v8, a1
-; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw a1, a1, 257
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    vsrl.vi v10, v12, 3
+; RV64I-NEXT:    vand.vx v10, v10, a1
+; RV64I-NEXT:    vmul.vv v8, v8, v10
 ; RV64I-NEXT:    li a1, 56
 ; RV64I-NEXT:    vsrl.vx v8, v8, a1
 ; RV64I-NEXT:    vse64.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
index 909d347dfa0691..90c166f612af7f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll
@@ -160,33 +160,28 @@ define void @ctpop_v2i64(ptr %x, ptr %y) {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    vsrl.vi v9, v8, 1
-; RV64-NEXT:    lui a1, 349525
-; RV64-NEXT:    addiw a1, a1, 1365
-; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    add a1, a1, a2
-; RV64-NEXT:    vand.vx v9, v9, a1
-; RV64-NEXT:    vsub.vv v8, v8, v9
-; RV64-NEXT:    lui a1, 209715
-; RV64-NEXT:    addiw a1, a1, 819
-; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    add a1, a1, a2
-; RV64-NEXT:    vand.vx v9, v8, a1
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vadd.vv v8, v9, v8
-; RV64-NEXT:    vsrl.vi v9, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v9
 ; RV64-NEXT:    lui a1, 61681
 ; RV64-NEXT:    addiw a1, a1, -241
 ; RV64-NEXT:    slli a2, a1, 32
 ; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vmv.v.x v9, a1
+; RV64-NEXT:    vsll.vi v10, v9, 2
+; RV64-NEXT:    vxor.vx v10, v10, a1
+; RV64-NEXT:    vadd.vv v11, v10, v10
+; RV64-NEXT:    vxor.vv v11, v10, v11
+; RV64-NEXT:    vsrl.vi v12, v8, 1
+; RV64-NEXT:    vand.vv v11, v12, v11
+; RV64-NEXT:    vsub.vv v8, v8, v11
+; RV64-NEXT:    vand.vv v11, v8, v10
+; RV64-NEXT:    vsrl.vi v8, v8, 2
+; RV64-NEXT:    vand.vv v8, v8, v10
+; RV64-NEXT:    vadd.vv v8, v11, v8
+; RV64-NEXT:    vsrl.vi v10, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v10
 ; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    lui a1, 4112
-; RV64-NEXT:    addiw a1, a1, 257
-; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    add a1, a1, a2
-; RV64-NEXT:    vmul.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v9, v9, 3
+; RV64-NEXT:    vand.vx v9, v9, a1
+; RV64-NEXT:    vmul.vv v8, v8, v9
 ; RV64-NEXT:    li a1, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a1
 ; RV64-NEXT:    vse64.v v8, (a0)
@@ -457,33 +452,28 @@ define void @ctpop_v4i64(ptr %x, ptr %y) {
 ; RV64:       # %bb.0:
 ; RV64-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64-NEXT:    vle64.v v8, (a0)
-; RV64-NEXT:    vsrl.vi v10, v8, 1
-; RV64-NEXT:    lui a1, 349525
-; RV64-NEXT:    addiw a1, a1, 1365
-; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    add a1, a1, a2
-; RV64-NEXT:    vand.vx v10, v10, a1
-; RV64-NEXT:    vsub.vv v8, v8, v10
-; RV64-NEXT:    lui a1, 209715
-; RV64-NEXT:    addiw a1, a1, 819
-; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    add a1, a1, a2
-; RV64-NEXT:    vand.vx v10, v8, a1
-; RV64-NEXT:    vsrl.vi v8, v8, 2
-; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    vadd.vv v8, v10, v8
-; RV64-NEXT:    vsrl.vi v10, v8, 4
-; RV64-NEXT:    vadd.vv v8, v8, v10
 ; RV64-NEXT:    lui a1, 61681
 ; RV64-NEXT:    addiw a1, a1, -241
 ; RV64-NEXT:    slli a2, a1, 32
 ; RV64-NEXT:    add a1, a1, a2
+; RV64-NEXT:    vmv.v.x v10, a1
+; RV64-NEXT:    vsll.vi v12, v10, 2
+; RV64-NEXT:    vxor.vx v12, v12, a1
+; RV64-NEXT:    vadd.vv v14, v12, v12
+; RV64-NEXT:    vxor.vv v14, v12, v14
+; RV64-NEXT:    vsrl.vi v16, v8, 1
+; RV64-NEXT:    vand.vv v14, v16, v14
+; RV64-NEXT:    vsub.vv v8, v8, v14
+; RV64-NEXT:    vand.vv v14, v8, v12
+; RV64-NEXT:    vsrl.vi v8, v8, 2
+; RV64-NEXT:    vand.vv v8, v8, v12
+; RV64-NEXT:    vadd.vv v8, v14, v8
+; RV64-NEXT:    vsrl.vi v12, v8, 4
+; RV64-NEXT:    vadd.vv v8, v8, v12
 ; RV64-NEXT:    vand.vx v8, v8, a1
-; RV64-NEXT:    lui a1, 4112
-; RV64-NEXT:    addiw a1, a1, 257
-; RV64-NEXT:    slli a2, a1, 32
-; RV64-NEXT:    add a1, a1, a2
-; RV64-NEXT:    vmul.vx v8, v8, a1
+; RV64-NEXT:    vsrl.vi v10, v10, 3
+; RV64-NEXT:    vand.vx v10, v10, a1
+; RV64-NEXT:    vmul.vv v8, v8, v10
 ; RV64-NEXT:    li a1, 56
 ; RV64-NEXT:    vsrl.vx v8, v8, a1
 ; RV64-NEXT:    vse64.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
index 23f09eb30cab22..b37b2039429164 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll
@@ -285,37 +285,32 @@ define void @cttz_v2i64(ptr %x, ptr %y) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64I-NEXT:    vle64.v v8, (a0)
-; RV64I-NEXT:    li a1, 1
-; RV64I-NEXT:    vsub.vx v9, v8, a1
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a1, 349525
-; RV64I-NEXT:    addiw a1, a1, 1365
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v9, v9, a1
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a1, 209715
-; RV64I-NEXT:    addiw a1, a1, 819
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v9, v8, a1
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a1
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
 ; RV64I-NEXT:    lui a1, 61681
 ; RV64I-NEXT:    addiw a1, a1, -241
 ; RV64I-NEXT:    slli a2, a1, 32
 ; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vmv.v.x v9, a1
+; RV64I-NEXT:    vsll.vi v10, v9, 2
+; RV64I-NEXT:    vxor.vx v10, v10, a1
+; RV64I-NEXT:    vadd.vv v11, v10, v10
+; RV64I-NEXT:    vxor.vv v11, v10, v11
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    vsub.vx v12, v8, a2
+; RV64I-NEXT:    vnot.v v8, v8
+; RV64I-NEXT:    vand.vv v8, v8, v12
+; RV64I-NEXT:    vsrl.vi v12, v8, 1
+; RV64I-NEXT:    vand.vv v11, v12, v11
+; RV64I-NEXT:    vsub.vv v8, v8, v11
+; RV64I-NEXT:    vand.vv v11, v8, v10
+; RV64I-NEXT:    vsrl.vi v8, v8, 2
+; RV64I-NEXT:    vand.vv v8, v8, v10
+; RV64I-NEXT:    vadd.vv v8, v11, v8
+; RV64I-NEXT:    vsrl.vi v10, v8, 4
+; RV64I-NEXT:    vadd.vv v8, v8, v10
 ; RV64I-NEXT:    vand.vx v8, v8, a1
-; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw a1, a1, 257
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    vsrl.vi v9, v9, 3
+; RV64I-NEXT:    vand.vx v9, v9, a1
+; RV64I-NEXT:    vmul.vv v8, v8, v9
 ; RV64I-NEXT:    li a1, 56
 ; RV64I-NEXT:    vsrl.vx v8, v8, a1
 ; RV64I-NEXT:    vse64.v v8, (a0)
@@ -677,37 +672,32 @@ define void @cttz_v4i64(ptr %x, ptr %y) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64I-NEXT:    vle64.v v8, (a0)
-; RV64I-NEXT:    li a1, 1
-; RV64I-NEXT:    vsub.vx v10, v8, a1
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    lui a1, 349525
-; RV64I-NEXT:    addiw a1, a1, 1365
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v10, v10, a1
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    lui a1, 209715
-; RV64I-NEXT:    addiw a1, a1, 819
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v10, v8, a1
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a1
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
 ; RV64I-NEXT:    lui a1, 61681
 ; RV64I-NEXT:    addiw a1, a1, -241
 ; RV64I-NEXT:    slli a2, a1, 32
 ; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vmv.v.x v10, a1
+; RV64I-NEXT:    vsll.vi v12, v10, 2
+; RV64I-NEXT:    vxor.vx v12, v12, a1
+; RV64I-NEXT:    vadd.vv v14, v12, v12
+; RV64I-NEXT:    vxor.vv v14, v12, v14
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    vsub.vx v16, v8, a2
+; RV64I-NEXT:    vnot.v v8, v8
+; RV64I-NEXT:    vand.vv v8, v8, v16
+; RV64I-NEXT:    vsrl.vi v16, v8, 1
+; RV64I-NEXT:    vand.vv v14, v16, v14
+; RV64I-NEXT:    vsub.vv v8, v8, v14
+; RV64I-NEXT:    vand.vv v14, v8, v12
+; RV64I-NEXT:    vsrl.vi v8, v8, 2
+; RV64I-NEXT:    vand.vv v8, v8, v12
+; RV64I-NEXT:    vadd.vv v8, v14, v8
+; RV64I-NEXT:    vsrl.vi v12, v8, 4
+; RV64I-NEXT:    vadd.vv v8, v8, v12
 ; RV64I-NEXT:    vand.vx v8, v8, a1
-; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw a1, a1, 257
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    vsrl.vi v10, v10, 3
+; RV64I-NEXT:    vand.vx v10, v10, a1
+; RV64I-NEXT:    vmul.vv v8, v8, v10
 ; RV64I-NEXT:    li a1, 56
 ; RV64I-NEXT:    vsrl.vx v8, v8, a1
 ; RV64I-NEXT:    vse64.v v8, (a0)
@@ -1046,37 +1036,32 @@ define void @cttz_zero_undef_v2i64(ptr %x, ptr %y) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    vsetivli zero, 2, e64, m1, ta, ma
 ; RV64I-NEXT:    vle64.v v8, (a0)
-; RV64I-NEXT:    li a1, 1
-; RV64I-NEXT:    vsub.vx v9, v8, a1
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v9
-; RV64I-NEXT:    vsrl.vi v9, v8, 1
-; RV64I-NEXT:    lui a1, 349525
-; RV64I-NEXT:    addiw a1, a1, 1365
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v9, v9, a1
-; RV64I-NEXT:    vsub.vv v8, v8, v9
-; RV64I-NEXT:    lui a1, 209715
-; RV64I-NEXT:    addiw a1, a1, 819
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v9, v8, a1
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a1
-; RV64I-NEXT:    vadd.vv v8, v9, v8
-; RV64I-NEXT:    vsrl.vi v9, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v9
 ; RV64I-NEXT:    lui a1, 61681
 ; RV64I-NEXT:    addiw a1, a1, -241
 ; RV64I-NEXT:    slli a2, a1, 32
 ; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vmv.v.x v9, a1
+; RV64I-NEXT:    vsll.vi v10, v9, 2
+; RV64I-NEXT:    vxor.vx v10, v10, a1
+; RV64I-NEXT:    vadd.vv v11, v10, v10
+; RV64I-NEXT:    vxor.vv v11, v10, v11
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    vsub.vx v12, v8, a2
+; RV64I-NEXT:    vnot.v v8, v8
+; RV64I-NEXT:    vand.vv v8, v8, v12
+; RV64I-NEXT:    vsrl.vi v12, v8, 1
+; RV64I-NEXT:    vand.vv v11, v12, v11
+; RV64I-NEXT:    vsub.vv v8, v8, v11
+; RV64I-NEXT:    vand.vv v11, v8, v10
+; RV64I-NEXT:    vsrl.vi v8, v8, 2
+; RV64I-NEXT:    vand.vv v8, v8, v10
+; RV64I-NEXT:    vadd.vv v8, v11, v8
+; RV64I-NEXT:    vsrl.vi v10, v8, 4
+; RV64I-NEXT:    vadd.vv v8, v8, v10
 ; RV64I-NEXT:    vand.vx v8, v8, a1
-; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw a1, a1, 257
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    vsrl.vi v9, v9, 3
+; RV64I-NEXT:    vand.vx v9, v9, a1
+; RV64I-NEXT:    vmul.vv v8, v8, v9
 ; RV64I-NEXT:    li a1, 56
 ; RV64I-NEXT:    vsrl.vx v8, v8, a1
 ; RV64I-NEXT:    vse64.v v8, (a0)
@@ -1408,37 +1393,32 @@ define void @cttz_zero_undef_v4i64(ptr %x, ptr %y) nounwind {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    vsetivli zero, 4, e64, m2, ta, ma
 ; RV64I-NEXT:    vle64.v v8, (a0)
-; RV64I-NEXT:    li a1, 1
-; RV64I-NEXT:    vsub.vx v10, v8, a1
-; RV64I-NEXT:    vnot.v v8, v8
-; RV64I-NEXT:    vand.vv v8, v8, v10
-; RV64I-NEXT:    vsrl.vi v10, v8, 1
-; RV64I-NEXT:    lui a1, 349525
-; RV64I-NEXT:    addiw a1, a1, 1365
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v10, v10, a1
-; RV64I-NEXT:    vsub.vv v8, v8, v10
-; RV64I-NEXT:    lui a1, 209715
-; RV64I-NEXT:    addiw a1, a1, 819
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vand.vx v10, v8, a1
-; RV64I-NEXT:    vsrl.vi v8, v8, 2
-; RV64I-NEXT:    vand.vx v8, v8, a1
-; RV64I-NEXT:    vadd.vv v8, v10, v8
-; RV64I-NEXT:    vsrl.vi v10, v8, 4
-; RV64I-NEXT:    vadd.vv v8, v8, v10
 ; RV64I-NEXT:    lui a1, 61681
 ; RV64I-NEXT:    addiw a1, a1, -241
 ; RV64I-NEXT:    slli a2, a1, 32
 ; RV64I-NEXT:    add a1, a1, a2
+; RV64I-NEXT:    vmv.v.x v10, a1
+; RV64I-NEXT:    vsll.vi v12, v10, 2
+; RV64I-NEXT:    vxor.vx v12, v12, a1
+; RV64I-NEXT:    vadd.vv v14, v12, v12
+; RV64I-NEXT:    vxor.vv v14, v12, v14
+; RV64I-NEXT:    li a2, 1
+; RV64I-NEXT:    vsub.vx v16, v8, a2
+; RV64I-NEXT:    vnot.v v8, v8
+; RV64I-NEXT:    vand.vv v8, v8, v16
+; RV64I-NEXT:    vsrl.vi v16, v8, 1
+; RV64I-NEXT:    vand.vv v14, v16, v14
+; RV64I-NEXT:    vsub.vv v8, v8, v14
+; RV64I-NEXT:    vand.vv v14, v8, v12
+; RV64I-NEXT:    vsrl.vi v8, v8, 2
+; RV64I-NEXT:    vand.vv v8, v8, v12
+; RV64I-NEXT:    vadd.vv v8, v14, v8
+; RV64I-NEXT:    vsrl.vi v12, v8, 4
+; RV64I-NEXT:    vadd.vv v8, v8, v12
 ; RV64I-NEXT:    vand.vx v8, v8, a1
-; RV64I-NEXT:    lui a1, 4112
-; RV64I-NEXT:    addiw a1, a1, 257
-; RV64I-NEXT:    slli a2, a1, 32
-; RV64I-NEXT:    add a1, a1, a2
-; RV64I-NEXT:    vmul.vx v8, v8, a1
+; RV64I-NEXT:    vsrl.vi v10, v10, 3
+; RV64I-NEXT:    vand.vx v10, v10, a1
+; RV64I-NEXT:    vmul.vv v8, v8, v10
 ; RV64I-NEXT:    li a1, 56
 ; RV64I-NEXT:    vsrl.vx v8, v8, a1
 ; RV64I-NEXT:    vse64.v v8, (a0)



More information about the llvm-commits mailing list