[llvm] c04ff58 - [AArch64] Combine setcc (iN (bitcast (vNi1 X))) with vecreduce_or

Cullen Rhodes via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 25 05:14:50 PDT 2022


Author: Cullen Rhodes
Date: 2022-07-25T12:14:33Z
New Revision: c04ff587dce29063525ce67e1af11c541f98412d

URL: https://github.com/llvm/llvm-project/commit/c04ff587dce29063525ce67e1af11c541f98412d
DIFF: https://github.com/llvm/llvm-project/commit/c04ff587dce29063525ce67e1af11c541f98412d.diff

LOG: [AArch64] Combine setcc (iN (bitcast (vNi1 X))) with vecreduce_or

Reviewed By: paulwalker-arm

Differential Revision: https://reviews.llvm.org/D130163

Added: 
    

Modified: 
    llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/test/CodeGen/AArch64/dag-combine-setcc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8a80875f55772..94baadae242fa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -18193,7 +18193,9 @@ static SDValue tryToWidenSetCCOperands(SDNode *Op, SelectionDAG &DAG) {
                      Op0ExtV, Op1ExtV, Op->getOperand(2));
 }
 
-static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performSETCCCombine(SDNode *N,
+                                   TargetLowering::DAGCombinerInfo &DCI,
+                                   SelectionDAG &DAG) {
   assert(N->getOpcode() == ISD::SETCC && "Unexpected opcode!");
   SDValue LHS = N->getOperand(0);
   SDValue RHS = N->getOperand(1);
@@ -18236,6 +18238,21 @@ static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG) {
     }
   }
 
+  // setcc (iN (bitcast (vNi1 X))), 0, (eq|ne)
+  //   ==> setcc (iN (zext (i1 (vecreduce_or (vNi1 X))))), 0, (eq|ne)
+  if (DCI.isBeforeLegalize() && VT.isScalarInteger() &&
+      (Cond == ISD::SETEQ || Cond == ISD::SETNE) && isNullConstant(RHS) &&
+      LHS->getOpcode() == ISD::BITCAST) {
+    EVT ToVT = LHS->getValueType(0);
+    EVT FromVT = LHS->getOperand(0).getValueType();
+    if (FromVT.isFixedLengthVector() &&
+        FromVT.getVectorElementType() == MVT::i1) {
+      LHS = DAG.getNode(ISD::VECREDUCE_OR, DL, MVT::i1, LHS->getOperand(0));
+      LHS = DAG.getNode(ISD::ZERO_EXTEND, DL, ToVT, LHS);
+      return DAG.getSetCC(DL, VT, LHS, RHS, Cond);
+    }
+  }
+
   return SDValue();
 }
 
@@ -19378,7 +19395,7 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::VSELECT:
     return performVSelectCombine(N, DCI.DAG);
   case ISD::SETCC:
-    return performSETCCCombine(N, DAG);
+    return performSETCCCombine(N, DCI, DAG);
   case ISD::LOAD:
     if (performTBISimplification(N->getOperand(1), DCI, DAG))
       return SDValue(N, 0);

diff  --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
index 7c5058263b675..ee892ae9cf518 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll
@@ -4,34 +4,11 @@
 define i1 @combine_setcc_eq_vecreduce_or_v8i1(<8 x i8> %a) {
 ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    cmeq v0.8b, v0.8b, #0
-; CHECK-NEXT:    umov w8, v0.b[1]
-; CHECK-NEXT:    umov w10, v0.b[2]
-; CHECK-NEXT:    umov w9, v0.b[0]
-; CHECK-NEXT:    umov w11, v0.b[3]
-; CHECK-NEXT:    umov w12, v0.b[4]
-; CHECK-NEXT:    umov w13, v0.b[5]
-; CHECK-NEXT:    umov w14, v0.b[6]
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    bfi w9, w8, #1, #1
-; CHECK-NEXT:    and w8, w12, #0x1
-; CHECK-NEXT:    bfi w9, w10, #2, #1
-; CHECK-NEXT:    and w10, w13, #0x1
-; CHECK-NEXT:    bfi w9, w11, #3, #1
-; CHECK-NEXT:    umov w11, v0.b[7]
-; CHECK-NEXT:    bfi w9, w8, #4, #1
-; CHECK-NEXT:    and w8, w14, #0x1
-; CHECK-NEXT:    bfi w9, w10, #5, #1
-; CHECK-NEXT:    orr w8, w9, w8, lsl #6
-; CHECK-NEXT:    orr w8, w8, w11, lsl #7
-; CHECK-NEXT:    tst w8, #0xff
-; CHECK-NEXT:    cset w0, eq
-; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    umaxv b0, v0.8b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    mvn w8, w8
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %cmp1 = icmp eq <8 x i8> %a, zeroinitializer
   %cast = bitcast <8 x i1> %cmp1 to i8
@@ -42,58 +19,11 @@ define i1 @combine_setcc_eq_vecreduce_or_v8i1(<8 x i8> %a) {
 define i1 @combine_setcc_eq_vecreduce_or_v16i1(<16 x i8> %a) {
 ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    cmeq v0.16b, v0.16b, #0
-; CHECK-NEXT:    umov w8, v0.b[1]
-; CHECK-NEXT:    umov w10, v0.b[2]
-; CHECK-NEXT:    umov w9, v0.b[0]
-; CHECK-NEXT:    umov w11, v0.b[3]
-; CHECK-NEXT:    umov w12, v0.b[4]
-; CHECK-NEXT:    umov w13, v0.b[5]
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    bfi w9, w8, #1, #1
-; CHECK-NEXT:    umov w8, v0.b[6]
-; CHECK-NEXT:    bfi w9, w10, #2, #1
-; CHECK-NEXT:    umov w10, v0.b[7]
-; CHECK-NEXT:    bfi w9, w11, #3, #1
-; CHECK-NEXT:    umov w11, v0.b[8]
-; CHECK-NEXT:    bfi w9, w12, #4, #1
-; CHECK-NEXT:    umov w12, v0.b[9]
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    bfi w9, w13, #5, #1
-; CHECK-NEXT:    umov w13, v0.b[10]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    orr w8, w9, w8, lsl #6
-; CHECK-NEXT:    umov w9, v0.b[11]
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    orr w8, w8, w10, lsl #7
-; CHECK-NEXT:    umov w10, v0.b[12]
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    orr w8, w8, w11, lsl #8
-; CHECK-NEXT:    umov w11, v0.b[13]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    orr w8, w8, w12, lsl #9
-; CHECK-NEXT:    umov w12, v0.b[14]
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    orr w8, w8, w13, lsl #10
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    orr w8, w8, w9, lsl #11
-; CHECK-NEXT:    and w9, w11, #0x1
-; CHECK-NEXT:    umov w11, v0.b[15]
-; CHECK-NEXT:    orr w8, w8, w10, lsl #12
-; CHECK-NEXT:    and w10, w12, #0x1
-; CHECK-NEXT:    orr w8, w8, w9, lsl #13
-; CHECK-NEXT:    orr w8, w8, w10, lsl #14
-; CHECK-NEXT:    orr w8, w8, w11, lsl #15
-; CHECK-NEXT:    tst w8, #0xffff
-; CHECK-NEXT:    cset w0, eq
-; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    umaxv b0, v0.16b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    mvn w8, w8
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %cmp1 = icmp eq <16 x i8> %a, zeroinitializer
   %cast = bitcast <16 x i1> %cmp1 to i16
@@ -104,106 +34,13 @@ define i1 @combine_setcc_eq_vecreduce_or_v16i1(<16 x i8> %a) {
 define i1 @combine_setcc_eq_vecreduce_or_v32i1(<32 x i8> %a) {
 ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v32i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    cmeq v1.16b, v1.16b, #0
 ; CHECK-NEXT:    cmeq v0.16b, v0.16b, #0
-; CHECK-NEXT:    umov w8, v1.b[1]
-; CHECK-NEXT:    umov w10, v1.b[2]
-; CHECK-NEXT:    umov w9, v1.b[0]
-; CHECK-NEXT:    umov w11, v1.b[3]
-; CHECK-NEXT:    umov w12, v1.b[4]
-; CHECK-NEXT:    umov w13, v1.b[5]
-; CHECK-NEXT:    umov w14, v1.b[6]
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    umov w15, v1.b[7]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    bfi w9, w8, #1, #1
-; CHECK-NEXT:    umov w8, v1.b[8]
-; CHECK-NEXT:    bfi w9, w10, #2, #1
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    umov w10, v1.b[9]
-; CHECK-NEXT:    bfi w9, w11, #3, #1
-; CHECK-NEXT:    bfi w9, w12, #4, #1
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    umov w11, v1.b[10]
-; CHECK-NEXT:    bfi w9, w13, #5, #1
-; CHECK-NEXT:    and w15, w15, #0x1
-; CHECK-NEXT:    umov w13, v0.b[1]
-; CHECK-NEXT:    orr w9, w9, w14, lsl #6
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    orr w9, w9, w15, lsl #7
-; CHECK-NEXT:    umov w15, v0.b[2]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    umov w14, v0.b[0]
-; CHECK-NEXT:    orr w8, w9, w8, lsl #8
-; CHECK-NEXT:    umov w9, v0.b[3]
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    orr w8, w8, w10, lsl #9
-; CHECK-NEXT:    umov w10, v0.b[4]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    orr w8, w8, w11, lsl #10
-; CHECK-NEXT:    and w11, w15, #0x1
-; CHECK-NEXT:    umov w15, v0.b[5]
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    bfi w14, w13, #1, #1
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    umov w13, v0.b[6]
-; CHECK-NEXT:    bfi w14, w11, #2, #1
-; CHECK-NEXT:    umov w11, v1.b[12]
-; CHECK-NEXT:    bfi w14, w9, #3, #1
-; CHECK-NEXT:    and w9, w15, #0x1
-; CHECK-NEXT:    umov w12, v1.b[11]
-; CHECK-NEXT:    bfi w14, w10, #4, #1
-; CHECK-NEXT:    umov w10, v0.b[7]
-; CHECK-NEXT:    umov w15, v0.b[8]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    bfi w14, w9, #5, #1
-; CHECK-NEXT:    and w9, w11, #0x1
-; CHECK-NEXT:    orr w11, w14, w13, lsl #6
-; CHECK-NEXT:    umov w13, v0.b[9]
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    and w14, w15, #0x1
-; CHECK-NEXT:    umov w15, v0.b[10]
-; CHECK-NEXT:    orr w8, w8, w12, lsl #11
-; CHECK-NEXT:    orr w10, w11, w10, lsl #7
-; CHECK-NEXT:    orr w8, w8, w9, lsl #12
-; CHECK-NEXT:    orr w9, w10, w14, lsl #8
-; CHECK-NEXT:    and w10, w13, #0x1
-; CHECK-NEXT:    umov w11, v0.b[11]
-; CHECK-NEXT:    and w12, w15, #0x1
-; CHECK-NEXT:    umov w13, v0.b[12]
-; CHECK-NEXT:    orr w9, w9, w10, lsl #9
-; CHECK-NEXT:    umov w14, v1.b[13]
-; CHECK-NEXT:    orr w9, w9, w12, lsl #10
-; CHECK-NEXT:    umov w12, v0.b[13]
-; CHECK-NEXT:    umov w10, v1.b[14]
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    umov w15, v0.b[14]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    orr w9, w9, w11, lsl #11
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    umov w11, v1.b[15]
-; CHECK-NEXT:    orr w9, w9, w13, lsl #12
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    umov w13, v0.b[15]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    and w15, w15, #0x1
-; CHECK-NEXT:    orr w8, w8, w14, lsl #13
-; CHECK-NEXT:    orr w9, w9, w12, lsl #13
-; CHECK-NEXT:    orr w8, w8, w10, lsl #14
-; CHECK-NEXT:    orr w9, w9, w15, lsl #14
-; CHECK-NEXT:    orr w8, w8, w11, lsl #15
-; CHECK-NEXT:    orr w9, w9, w13, lsl #15
-; CHECK-NEXT:    orr w8, w9, w8
-; CHECK-NEXT:    tst w8, #0xffff
-; CHECK-NEXT:    cset w0, eq
-; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    umaxv b0, v0.16b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    mvn w8, w8
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %cmp1 = icmp eq <32 x i8> %a, zeroinitializer
   %cast = bitcast <32 x i1> %cmp1 to i32
@@ -214,202 +51,17 @@ define i1 @combine_setcc_eq_vecreduce_or_v32i1(<32 x i8> %a) {
 define i1 @combine_setcc_eq_vecreduce_or_v64i1(<64 x i8> %a) {
 ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v64i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    cmeq v2.16b, v2.16b, #0
 ; CHECK-NEXT:    cmeq v3.16b, v3.16b, #0
 ; CHECK-NEXT:    cmeq v1.16b, v1.16b, #0
-; CHECK-NEXT:    umov w8, v3.b[1]
-; CHECK-NEXT:    umov w10, v3.b[2]
-; CHECK-NEXT:    umov w9, v3.b[0]
-; CHECK-NEXT:    umov w11, v3.b[3]
-; CHECK-NEXT:    umov w12, v3.b[4]
-; CHECK-NEXT:    umov w13, v3.b[5]
-; CHECK-NEXT:    umov w14, v3.b[6]
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    umov w15, v3.b[7]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    bfi w9, w8, #1, #1
-; CHECK-NEXT:    umov w16, v3.b[8]
-; CHECK-NEXT:    bfi w9, w10, #2, #1
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    umov w17, v3.b[9]
-; CHECK-NEXT:    bfi w9, w11, #3, #1
-; CHECK-NEXT:    umov w8, v3.b[10]
-; CHECK-NEXT:    bfi w9, w12, #4, #1
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    umov w10, v3.b[11]
-; CHECK-NEXT:    bfi w9, w13, #5, #1
-; CHECK-NEXT:    and w15, w15, #0x1
-; CHECK-NEXT:    umov w11, v3.b[12]
-; CHECK-NEXT:    and w16, w16, #0x1
-; CHECK-NEXT:    orr w9, w9, w14, lsl #6
-; CHECK-NEXT:    umov w12, v3.b[13]
-; CHECK-NEXT:    and w14, w17, #0x1
-; CHECK-NEXT:    orr w9, w9, w15, lsl #7
-; CHECK-NEXT:    umov w13, v3.b[14]
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    orr w9, w9, w16, lsl #8
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    orr w9, w9, w14, lsl #9
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    orr w8, w9, w8, lsl #10
-; CHECK-NEXT:    umov w9, v1.b[1]
-; CHECK-NEXT:    orr w8, w8, w10, lsl #11
-; CHECK-NEXT:    and w10, w12, #0x1
-; CHECK-NEXT:    orr w8, w8, w11, lsl #12
-; CHECK-NEXT:    and w11, w13, #0x1
-; CHECK-NEXT:    umov w12, v1.b[2]
-; CHECK-NEXT:    orr w8, w8, w10, lsl #13
-; CHECK-NEXT:    umov w10, v1.b[0]
-; CHECK-NEXT:    orr w8, w8, w11, lsl #14
-; CHECK-NEXT:    umov w11, v1.b[3]
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    umov w13, v1.b[4]
-; CHECK-NEXT:    umov w14, v1.b[5]
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    umov w15, v3.b[15]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    bfi w10, w9, #1, #1
-; CHECK-NEXT:    and w9, w11, #0x1
-; CHECK-NEXT:    and w11, w13, #0x1
-; CHECK-NEXT:    bfi w10, w12, #2, #1
-; CHECK-NEXT:    umov w13, v1.b[6]
-; CHECK-NEXT:    and w12, w14, #0x1
-; CHECK-NEXT:    bfi w10, w9, #3, #1
-; CHECK-NEXT:    umov w9, v1.b[7]
-; CHECK-NEXT:    bfi w10, w11, #4, #1
-; CHECK-NEXT:    umov w11, v1.b[8]
-; CHECK-NEXT:    bfi w10, w12, #5, #1
-; CHECK-NEXT:    umov w14, v1.b[10]
-; CHECK-NEXT:    and w12, w13, #0x1
-; CHECK-NEXT:    umov w13, v1.b[9]
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    orr w8, w8, w15, lsl #15
-; CHECK-NEXT:    orr w10, w10, w12, lsl #6
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    orr w9, w10, w9, lsl #7
-; CHECK-NEXT:    umov w10, v1.b[11]
-; CHECK-NEXT:    orr w9, w9, w11, lsl #8
-; CHECK-NEXT:    and w11, w13, #0x1
-; CHECK-NEXT:    umov w13, v1.b[12]
-; CHECK-NEXT:    and w12, w14, #0x1
-; CHECK-NEXT:    orr w9, w9, w11, lsl #9
-; CHECK-NEXT:    umov w11, v1.b[13]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    orr w9, w9, w12, lsl #10
-; CHECK-NEXT:    cmeq v2.16b, v2.16b, #0
-; CHECK-NEXT:    orr w9, w9, w10, lsl #11
-; CHECK-NEXT:    and w10, w13, #0x1
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    umov w12, v2.b[1]
-; CHECK-NEXT:    orr w9, w9, w10, lsl #12
-; CHECK-NEXT:    umov w10, v2.b[2]
-; CHECK-NEXT:    umov w14, v2.b[0]
-; CHECK-NEXT:    orr w9, w9, w11, lsl #13
-; CHECK-NEXT:    umov w11, v2.b[3]
-; CHECK-NEXT:    umov w15, v2.b[4]
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    umov w16, v2.b[5]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    umov w13, v1.b[14]
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    bfi w14, w12, #1, #1
-; CHECK-NEXT:    and w12, w15, #0x1
-; CHECK-NEXT:    umov w15, v2.b[6]
-; CHECK-NEXT:    bfi w14, w10, #2, #1
-; CHECK-NEXT:    umov w10, v2.b[7]
-; CHECK-NEXT:    bfi w14, w11, #3, #1
-; CHECK-NEXT:    and w11, w16, #0x1
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    bfi w14, w12, #4, #1
-; CHECK-NEXT:    and w12, w15, #0x1
-; CHECK-NEXT:    umov w16, v2.b[11]
-; CHECK-NEXT:    bfi w14, w11, #5, #1
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    orr w9, w9, w13, lsl #14
-; CHECK-NEXT:    umov w13, v2.b[8]
-; CHECK-NEXT:    orr w12, w14, w12, lsl #6
-; CHECK-NEXT:    umov w11, v1.b[15]
-; CHECK-NEXT:    orr w10, w12, w10, lsl #7
-; CHECK-NEXT:    umov w12, v2.b[9]
-; CHECK-NEXT:    umov w14, v2.b[10]
-; CHECK-NEXT:    and w16, w16, #0x1
 ; CHECK-NEXT:    cmeq v0.16b, v0.16b, #0
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    orr w9, w9, w11, lsl #15
-; CHECK-NEXT:    and w11, w12, #0x1
-; CHECK-NEXT:    umov w12, v0.b[1]
-; CHECK-NEXT:    orr w10, w10, w13, lsl #8
-; CHECK-NEXT:    and w13, w14, #0x1
-; CHECK-NEXT:    umov w14, v0.b[0]
-; CHECK-NEXT:    orr w10, w10, w11, lsl #9
-; CHECK-NEXT:    umov w11, v0.b[2]
-; CHECK-NEXT:    orr w10, w10, w13, lsl #10
-; CHECK-NEXT:    umov w13, v0.b[3]
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    umov w15, v0.b[4]
-; CHECK-NEXT:    umov w17, v0.b[5]
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    orr w10, w10, w16, lsl #11
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    bfi w14, w12, #1, #1
-; CHECK-NEXT:    and w12, w13, #0x1
-; CHECK-NEXT:    umov w16, v2.b[13]
-; CHECK-NEXT:    and w13, w15, #0x1
-; CHECK-NEXT:    umov w15, v0.b[6]
-; CHECK-NEXT:    bfi w14, w11, #2, #1
-; CHECK-NEXT:    umov w11, v2.b[12]
-; CHECK-NEXT:    bfi w14, w12, #3, #1
-; CHECK-NEXT:    and w12, w17, #0x1
-; CHECK-NEXT:    bfi w14, w13, #4, #1
-; CHECK-NEXT:    umov w13, v0.b[7]
-; CHECK-NEXT:    umov w17, v0.b[8]
-; CHECK-NEXT:    and w15, w15, #0x1
-; CHECK-NEXT:    bfi w14, w12, #5, #1
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    orr w12, w14, w15, lsl #6
-; CHECK-NEXT:    umov w14, v0.b[9]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    orr w10, w10, w11, lsl #12
-; CHECK-NEXT:    and w15, w17, #0x1
-; CHECK-NEXT:    umov w17, v0.b[10]
-; CHECK-NEXT:    orr w12, w12, w13, lsl #7
-; CHECK-NEXT:    umov w13, v0.b[11]
-; CHECK-NEXT:    orr w11, w12, w15, lsl #8
-; CHECK-NEXT:    and w12, w14, #0x1
-; CHECK-NEXT:    umov w15, v0.b[12]
-; CHECK-NEXT:    and w16, w16, #0x1
-; CHECK-NEXT:    and w14, w17, #0x1
-; CHECK-NEXT:    orr w11, w11, w12, lsl #9
-; CHECK-NEXT:    umov w12, v2.b[14]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    orr w11, w11, w14, lsl #10
-; CHECK-NEXT:    umov w14, v0.b[13]
-; CHECK-NEXT:    umov w17, v0.b[14]
-; CHECK-NEXT:    and w15, w15, #0x1
-; CHECK-NEXT:    orr w11, w11, w13, lsl #11
-; CHECK-NEXT:    umov w13, v2.b[15]
-; CHECK-NEXT:    orr w11, w11, w15, lsl #12
-; CHECK-NEXT:    umov w15, v0.b[15]
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    and w17, w17, #0x1
-; CHECK-NEXT:    orr w10, w10, w16, lsl #13
-; CHECK-NEXT:    orr w11, w11, w14, lsl #13
-; CHECK-NEXT:    orr w10, w10, w12, lsl #14
-; CHECK-NEXT:    orr w11, w11, w17, lsl #14
-; CHECK-NEXT:    orr w10, w10, w13, lsl #15
-; CHECK-NEXT:    orr w11, w11, w15, lsl #15
-; CHECK-NEXT:    orr w8, w9, w8
-; CHECK-NEXT:    orr w9, w11, w10
-; CHECK-NEXT:    orr w8, w9, w8
-; CHECK-NEXT:    tst w8, #0xffff
-; CHECK-NEXT:    cset w0, eq
-; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    orr v1.16b, v1.16b, v3.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    umaxv b0, v0.16b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    mvn w8, w8
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %cmp1 = icmp eq <64 x i8> %a, zeroinitializer
   %cast = bitcast <64 x i1> %cmp1 to i64
@@ -420,34 +72,10 @@ define i1 @combine_setcc_eq_vecreduce_or_v64i1(<64 x i8> %a) {
 define i1 @combine_setcc_ne_vecreduce_or_v8i1(<8 x i8> %a) {
 ; CHECK-LABEL: combine_setcc_ne_vecreduce_or_v8i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    cmtst v0.8b, v0.8b, v0.8b
-; CHECK-NEXT:    umov w8, v0.b[1]
-; CHECK-NEXT:    umov w10, v0.b[2]
-; CHECK-NEXT:    umov w9, v0.b[0]
-; CHECK-NEXT:    umov w11, v0.b[3]
-; CHECK-NEXT:    umov w12, v0.b[4]
-; CHECK-NEXT:    umov w13, v0.b[5]
-; CHECK-NEXT:    umov w14, v0.b[6]
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    bfi w9, w8, #1, #1
-; CHECK-NEXT:    and w8, w12, #0x1
-; CHECK-NEXT:    bfi w9, w10, #2, #1
-; CHECK-NEXT:    and w10, w13, #0x1
-; CHECK-NEXT:    bfi w9, w11, #3, #1
-; CHECK-NEXT:    umov w11, v0.b[7]
-; CHECK-NEXT:    bfi w9, w8, #4, #1
-; CHECK-NEXT:    and w8, w14, #0x1
-; CHECK-NEXT:    bfi w9, w10, #5, #1
-; CHECK-NEXT:    orr w8, w9, w8, lsl #6
-; CHECK-NEXT:    orr w8, w8, w11, lsl #7
-; CHECK-NEXT:    tst w8, #0xff
-; CHECK-NEXT:    cset w0, ne
-; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    umaxv b0, v0.8b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %cmp1 = icmp ne <8 x i8> %a, zeroinitializer
   %cast = bitcast <8 x i1> %cmp1 to i8
@@ -458,58 +86,10 @@ define i1 @combine_setcc_ne_vecreduce_or_v8i1(<8 x i8> %a) {
 define i1 @combine_setcc_ne_vecreduce_or_v16i1(<16 x i8> %a) {
 ; CHECK-LABEL: combine_setcc_ne_vecreduce_or_v16i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
 ; CHECK-NEXT:    cmtst v0.16b, v0.16b, v0.16b
-; CHECK-NEXT:    umov w8, v0.b[1]
-; CHECK-NEXT:    umov w10, v0.b[2]
-; CHECK-NEXT:    umov w9, v0.b[0]
-; CHECK-NEXT:    umov w11, v0.b[3]
-; CHECK-NEXT:    umov w12, v0.b[4]
-; CHECK-NEXT:    umov w13, v0.b[5]
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    bfi w9, w8, #1, #1
-; CHECK-NEXT:    umov w8, v0.b[6]
-; CHECK-NEXT:    bfi w9, w10, #2, #1
-; CHECK-NEXT:    umov w10, v0.b[7]
-; CHECK-NEXT:    bfi w9, w11, #3, #1
-; CHECK-NEXT:    umov w11, v0.b[8]
-; CHECK-NEXT:    bfi w9, w12, #4, #1
-; CHECK-NEXT:    umov w12, v0.b[9]
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    bfi w9, w13, #5, #1
-; CHECK-NEXT:    umov w13, v0.b[10]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    orr w8, w9, w8, lsl #6
-; CHECK-NEXT:    umov w9, v0.b[11]
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    orr w8, w8, w10, lsl #7
-; CHECK-NEXT:    umov w10, v0.b[12]
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    orr w8, w8, w11, lsl #8
-; CHECK-NEXT:    umov w11, v0.b[13]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    orr w8, w8, w12, lsl #9
-; CHECK-NEXT:    umov w12, v0.b[14]
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    orr w8, w8, w13, lsl #10
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    orr w8, w8, w9, lsl #11
-; CHECK-NEXT:    and w9, w11, #0x1
-; CHECK-NEXT:    umov w11, v0.b[15]
-; CHECK-NEXT:    orr w8, w8, w10, lsl #12
-; CHECK-NEXT:    and w10, w12, #0x1
-; CHECK-NEXT:    orr w8, w8, w9, lsl #13
-; CHECK-NEXT:    orr w8, w8, w10, lsl #14
-; CHECK-NEXT:    orr w8, w8, w11, lsl #15
-; CHECK-NEXT:    tst w8, #0xffff
-; CHECK-NEXT:    cset w0, ne
-; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    umaxv b0, v0.16b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %cmp1 = icmp ne <16 x i8> %a, zeroinitializer
   %cast = bitcast <16 x i1> %cmp1 to i16
@@ -520,106 +100,11 @@ define i1 @combine_setcc_ne_vecreduce_or_v16i1(<16 x i8> %a) {
 define i1 @combine_setcc_ne_vecreduce_or_v32i1(<32 x i8> %a) {
 ; CHECK-LABEL: combine_setcc_ne_vecreduce_or_v32i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    cmtst v1.16b, v1.16b, v1.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    cmtst v0.16b, v0.16b, v0.16b
-; CHECK-NEXT:    umov w8, v1.b[1]
-; CHECK-NEXT:    umov w10, v1.b[2]
-; CHECK-NEXT:    umov w9, v1.b[0]
-; CHECK-NEXT:    umov w11, v1.b[3]
-; CHECK-NEXT:    umov w12, v1.b[4]
-; CHECK-NEXT:    umov w13, v1.b[5]
-; CHECK-NEXT:    umov w14, v1.b[6]
-; CHECK-NEXT:    umov w15, v1.b[7]
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    bfi w9, w8, #1, #1
-; CHECK-NEXT:    umov w8, v1.b[8]
-; CHECK-NEXT:    bfi w9, w10, #2, #1
-; CHECK-NEXT:    umov w10, v1.b[9]
-; CHECK-NEXT:    bfi w9, w11, #3, #1
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    bfi w9, w12, #4, #1
-; CHECK-NEXT:    umov w11, v1.b[10]
-; CHECK-NEXT:    bfi w9, w13, #5, #1
-; CHECK-NEXT:    and w15, w15, #0x1
-; CHECK-NEXT:    umov w13, v0.b[1]
-; CHECK-NEXT:    orr w9, w9, w14, lsl #6
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    orr w9, w9, w15, lsl #7
-; CHECK-NEXT:    umov w15, v0.b[2]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    umov w14, v0.b[0]
-; CHECK-NEXT:    orr w8, w9, w8, lsl #8
-; CHECK-NEXT:    umov w9, v0.b[3]
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    orr w8, w8, w10, lsl #9
-; CHECK-NEXT:    umov w10, v0.b[4]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    orr w8, w8, w11, lsl #10
-; CHECK-NEXT:    umov w11, v0.b[5]
-; CHECK-NEXT:    and w15, w15, #0x1
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    bfi w14, w13, #1, #1
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    umov w13, v0.b[6]
-; CHECK-NEXT:    bfi w14, w15, #2, #1
-; CHECK-NEXT:    bfi w14, w9, #3, #1
-; CHECK-NEXT:    and w9, w11, #0x1
-; CHECK-NEXT:    umov w12, v1.b[11]
-; CHECK-NEXT:    umov w15, v1.b[12]
-; CHECK-NEXT:    bfi w14, w10, #4, #1
-; CHECK-NEXT:    umov w10, v0.b[7]
-; CHECK-NEXT:    and w11, w13, #0x1
-; CHECK-NEXT:    umov w13, v0.b[8]
-; CHECK-NEXT:    bfi w14, w9, #5, #1
-; CHECK-NEXT:    orr w11, w14, w11, lsl #6
-; CHECK-NEXT:    umov w14, v0.b[9]
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    and w9, w15, #0x1
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    umov w15, v0.b[10]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    orr w8, w8, w12, lsl #11
-; CHECK-NEXT:    orr w10, w11, w10, lsl #7
-; CHECK-NEXT:    orr w8, w8, w9, lsl #12
-; CHECK-NEXT:    orr w9, w10, w13, lsl #8
-; CHECK-NEXT:    and w10, w14, #0x1
-; CHECK-NEXT:    umov w11, v0.b[11]
-; CHECK-NEXT:    and w12, w15, #0x1
-; CHECK-NEXT:    umov w13, v0.b[12]
-; CHECK-NEXT:    orr w9, w9, w10, lsl #9
-; CHECK-NEXT:    umov w14, v1.b[13]
-; CHECK-NEXT:    orr w9, w9, w12, lsl #10
-; CHECK-NEXT:    umov w12, v0.b[13]
-; CHECK-NEXT:    umov w10, v1.b[14]
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    umov w15, v0.b[14]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    orr w9, w9, w11, lsl #11
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    umov w11, v1.b[15]
-; CHECK-NEXT:    orr w9, w9, w13, lsl #12
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    umov w13, v0.b[15]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    and w15, w15, #0x1
-; CHECK-NEXT:    orr w8, w8, w14, lsl #13
-; CHECK-NEXT:    orr w9, w9, w12, lsl #13
-; CHECK-NEXT:    orr w8, w8, w10, lsl #14
-; CHECK-NEXT:    orr w9, w9, w15, lsl #14
-; CHECK-NEXT:    orr w8, w8, w11, lsl #15
-; CHECK-NEXT:    orr w9, w9, w13, lsl #15
-; CHECK-NEXT:    orr w8, w9, w8
-; CHECK-NEXT:    tst w8, #0xffff
-; CHECK-NEXT:    cset w0, ne
-; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    umaxv b0, v0.16b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %cmp1 = icmp ne <32 x i8> %a, zeroinitializer
   %cast = bitcast <32 x i1> %cmp1 to i32
@@ -630,202 +115,13 @@ define i1 @combine_setcc_ne_vecreduce_or_v32i1(<32 x i8> %a) {
 define i1 @combine_setcc_ne_vecreduce_or_v64i1(<64 x i8> %a) {
 ; CHECK-LABEL: combine_setcc_ne_vecreduce_or_v64i1:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    sub sp, sp, #16
-; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    cmtst v3.16b, v3.16b, v3.16b
-; CHECK-NEXT:    cmtst v1.16b, v1.16b, v1.16b
-; CHECK-NEXT:    cmtst v2.16b, v2.16b, v2.16b
-; CHECK-NEXT:    umov w8, v3.b[1]
-; CHECK-NEXT:    umov w10, v3.b[2]
-; CHECK-NEXT:    umov w9, v3.b[0]
-; CHECK-NEXT:    umov w11, v3.b[3]
-; CHECK-NEXT:    umov w12, v3.b[4]
-; CHECK-NEXT:    umov w13, v3.b[5]
-; CHECK-NEXT:    umov w14, v3.b[6]
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    umov w15, v3.b[7]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    bfi w9, w8, #1, #1
-; CHECK-NEXT:    umov w16, v3.b[8]
-; CHECK-NEXT:    bfi w9, w10, #2, #1
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    umov w17, v3.b[9]
-; CHECK-NEXT:    bfi w9, w11, #3, #1
-; CHECK-NEXT:    umov w8, v3.b[10]
-; CHECK-NEXT:    bfi w9, w12, #4, #1
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    umov w10, v3.b[11]
-; CHECK-NEXT:    bfi w9, w13, #5, #1
-; CHECK-NEXT:    and w15, w15, #0x1
-; CHECK-NEXT:    umov w11, v3.b[12]
-; CHECK-NEXT:    orr w9, w9, w14, lsl #6
-; CHECK-NEXT:    and w16, w16, #0x1
-; CHECK-NEXT:    umov w12, v3.b[13]
-; CHECK-NEXT:    orr w9, w9, w15, lsl #7
-; CHECK-NEXT:    and w17, w17, #0x1
-; CHECK-NEXT:    and w8, w8, #0x1
-; CHECK-NEXT:    orr w9, w9, w16, lsl #8
-; CHECK-NEXT:    umov w14, v1.b[1]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    orr w9, w9, w17, lsl #9
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    orr w8, w9, w8, lsl #10
-; CHECK-NEXT:    umov w15, v1.b[0]
-; CHECK-NEXT:    and w9, w12, #0x1
-; CHECK-NEXT:    orr w8, w8, w10, lsl #11
-; CHECK-NEXT:    umov w10, v1.b[2]
-; CHECK-NEXT:    orr w8, w8, w11, lsl #12
-; CHECK-NEXT:    umov w11, v1.b[3]
-; CHECK-NEXT:    orr w8, w8, w9, lsl #13
-; CHECK-NEXT:    and w9, w14, #0x1
-; CHECK-NEXT:    umov w12, v1.b[4]
-; CHECK-NEXT:    and w14, w15, #0x1
-; CHECK-NEXT:    umov w15, v1.b[5]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    umov w13, v3.b[14]
-; CHECK-NEXT:    bfi w14, w9, #1, #1
-; CHECK-NEXT:    and w9, w11, #0x1
-; CHECK-NEXT:    and w11, w12, #0x1
-; CHECK-NEXT:    umov w12, v1.b[6]
-; CHECK-NEXT:    bfi w14, w10, #2, #1
-; CHECK-NEXT:    umov w10, v1.b[7]
-; CHECK-NEXT:    bfi w14, w9, #3, #1
-; CHECK-NEXT:    and w9, w15, #0x1
-; CHECK-NEXT:    bfi w14, w11, #4, #1
-; CHECK-NEXT:    umov w15, v1.b[11]
-; CHECK-NEXT:    and w11, w12, #0x1
-; CHECK-NEXT:    umov w12, v1.b[8]
-; CHECK-NEXT:    bfi w14, w9, #5, #1
-; CHECK-NEXT:    umov w9, v1.b[9]
-; CHECK-NEXT:    orr w11, w14, w11, lsl #6
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    umov w14, v1.b[10]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    orr w10, w11, w10, lsl #7
-; CHECK-NEXT:    and w9, w9, #0x1
-; CHECK-NEXT:    umov w11, v1.b[12]
-; CHECK-NEXT:    orr w10, w10, w12, lsl #8
-; CHECK-NEXT:    and w12, w15, #0x1
-; CHECK-NEXT:    orr w9, w10, w9, lsl #9
-; CHECK-NEXT:    and w10, w14, #0x1
-; CHECK-NEXT:    orr w8, w8, w13, lsl #14
-; CHECK-NEXT:    umov w13, v3.b[15]
-; CHECK-NEXT:    umov w14, v1.b[13]
-; CHECK-NEXT:    orr w9, w9, w10, lsl #10
-; CHECK-NEXT:    and w10, w11, #0x1
-; CHECK-NEXT:    orr w9, w9, w12, lsl #11
-; CHECK-NEXT:    umov w12, v2.b[2]
-; CHECK-NEXT:    umov w15, v2.b[4]
-; CHECK-NEXT:    orr w9, w9, w10, lsl #12
-; CHECK-NEXT:    umov w10, v2.b[1]
-; CHECK-NEXT:    orr w8, w8, w13, lsl #15
-; CHECK-NEXT:    and w11, w14, #0x1
-; CHECK-NEXT:    umov w13, v2.b[0]
-; CHECK-NEXT:    umov w14, v2.b[3]
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    umov w16, v2.b[5]
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    orr w9, w9, w11, lsl #13
-; CHECK-NEXT:    umov w11, v1.b[14]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    bfi w13, w10, #1, #1
-; CHECK-NEXT:    umov w10, v2.b[6]
-; CHECK-NEXT:    bfi w13, w12, #2, #1
-; CHECK-NEXT:    and w12, w15, #0x1
-; CHECK-NEXT:    bfi w13, w14, #3, #1
-; CHECK-NEXT:    and w14, w16, #0x1
-; CHECK-NEXT:    umov w15, v2.b[7]
-; CHECK-NEXT:    umov w16, v2.b[8]
-; CHECK-NEXT:    bfi w13, w12, #4, #1
-; CHECK-NEXT:    and w10, w10, #0x1
-; CHECK-NEXT:    bfi w13, w14, #5, #1
-; CHECK-NEXT:    umov w12, v1.b[15]
-; CHECK-NEXT:    orr w10, w13, w10, lsl #6
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    and w13, w15, #0x1
-; CHECK-NEXT:    umov w15, v2.b[9]
-; CHECK-NEXT:    and w14, w16, #0x1
-; CHECK-NEXT:    umov w16, v2.b[10]
+; CHECK-NEXT:    orr v1.16b, v1.16b, v3.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v2.16b
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    cmtst v0.16b, v0.16b, v0.16b
-; CHECK-NEXT:    orr w9, w9, w11, lsl #14
-; CHECK-NEXT:    orr w10, w10, w13, lsl #7
-; CHECK-NEXT:    orr w9, w9, w12, lsl #15
-; CHECK-NEXT:    and w11, w15, #0x1
-; CHECK-NEXT:    orr w10, w10, w14, lsl #8
-; CHECK-NEXT:    umov w12, v0.b[1]
-; CHECK-NEXT:    and w13, w16, #0x1
-; CHECK-NEXT:    umov w14, v0.b[0]
-; CHECK-NEXT:    orr w10, w10, w11, lsl #9
-; CHECK-NEXT:    umov w11, v0.b[2]
-; CHECK-NEXT:    orr w10, w10, w13, lsl #10
-; CHECK-NEXT:    umov w13, v0.b[3]
-; CHECK-NEXT:    umov w15, v0.b[4]
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    umov w17, v0.b[5]
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    umov w16, v2.b[11]
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    orr w8, w9, w8
-; CHECK-NEXT:    bfi w14, w12, #1, #1
-; CHECK-NEXT:    and w12, w13, #0x1
-; CHECK-NEXT:    and w13, w15, #0x1
-; CHECK-NEXT:    umov w15, v0.b[6]
-; CHECK-NEXT:    bfi w14, w11, #2, #1
-; CHECK-NEXT:    umov w11, v2.b[12]
-; CHECK-NEXT:    bfi w14, w12, #3, #1
-; CHECK-NEXT:    and w12, w17, #0x1
-; CHECK-NEXT:    bfi w14, w13, #4, #1
-; CHECK-NEXT:    umov w13, v0.b[7]
-; CHECK-NEXT:    umov w17, v0.b[8]
-; CHECK-NEXT:    and w15, w15, #0x1
-; CHECK-NEXT:    bfi w14, w12, #5, #1
-; CHECK-NEXT:    and w16, w16, #0x1
-; CHECK-NEXT:    orr w12, w14, w15, lsl #6
-; CHECK-NEXT:    umov w14, v0.b[9]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    and w11, w11, #0x1
-; CHECK-NEXT:    and w15, w17, #0x1
-; CHECK-NEXT:    umov w17, v0.b[10]
-; CHECK-NEXT:    orr w10, w10, w16, lsl #11
-; CHECK-NEXT:    orr w12, w12, w13, lsl #7
-; CHECK-NEXT:    orr w10, w10, w11, lsl #12
-; CHECK-NEXT:    orr w11, w12, w15, lsl #8
-; CHECK-NEXT:    and w12, w14, #0x1
-; CHECK-NEXT:    umov w13, v0.b[11]
-; CHECK-NEXT:    and w14, w17, #0x1
-; CHECK-NEXT:    umov w15, v0.b[12]
-; CHECK-NEXT:    orr w11, w11, w12, lsl #9
-; CHECK-NEXT:    umov w16, v2.b[13]
-; CHECK-NEXT:    orr w11, w11, w14, lsl #10
-; CHECK-NEXT:    umov w14, v0.b[13]
-; CHECK-NEXT:    umov w12, v2.b[14]
-; CHECK-NEXT:    and w13, w13, #0x1
-; CHECK-NEXT:    umov w17, v0.b[14]
-; CHECK-NEXT:    and w15, w15, #0x1
-; CHECK-NEXT:    orr w11, w11, w13, lsl #11
-; CHECK-NEXT:    and w16, w16, #0x1
-; CHECK-NEXT:    umov w13, v2.b[15]
-; CHECK-NEXT:    orr w11, w11, w15, lsl #12
-; CHECK-NEXT:    and w14, w14, #0x1
-; CHECK-NEXT:    umov w15, v0.b[15]
-; CHECK-NEXT:    and w12, w12, #0x1
-; CHECK-NEXT:    and w17, w17, #0x1
-; CHECK-NEXT:    orr w10, w10, w16, lsl #13
-; CHECK-NEXT:    orr w11, w11, w14, lsl #13
-; CHECK-NEXT:    orr w10, w10, w12, lsl #14
-; CHECK-NEXT:    orr w11, w11, w17, lsl #14
-; CHECK-NEXT:    orr w10, w10, w13, lsl #15
-; CHECK-NEXT:    orr w11, w11, w15, lsl #15
-; CHECK-NEXT:    orr w9, w11, w10
-; CHECK-NEXT:    orr w8, w9, w8
-; CHECK-NEXT:    tst w8, #0xffff
-; CHECK-NEXT:    cset w0, ne
-; CHECK-NEXT:    add sp, sp, #16
+; CHECK-NEXT:    umaxv b0, v0.16b
+; CHECK-NEXT:    fmov w8, s0
+; CHECK-NEXT:    and w0, w8, #0x1
 ; CHECK-NEXT:    ret
   %cmp1 = icmp ne <64 x i8> %a, zeroinitializer
   %cast = bitcast <64 x i1> %cmp1 to i64


        


More information about the llvm-commits mailing list