[llvm] r365010 - [Codegen][X86][AArch64][ARM][PowerPC] Inc-of-add vs sub-of-not (PR42457)

Roman Lebedev via llvm-commits llvm-commits at lists.llvm.org
Wed Jul 3 02:41:36 PDT 2019


Author: lebedevri
Date: Wed Jul  3 02:41:35 2019
New Revision: 365010

URL: http://llvm.org/viewvc/llvm-project?rev=365010&view=rev
Log:
[Codegen][X86][AArch64][ARM][PowerPC] Inc-of-add vs sub-of-not (PR42457)

Summary:
This is the backend part of [[ https://bugs.llvm.org/show_bug.cgi?id=42457 | PR42457 ]].
In middle-end, we'd want to prefer the form with two adds - D63992,
but as this diff shows, not every target will prefer that pattern.

Out of 4 targets for which i added tests all seem to be ok with inc-of-add for scalars,
but only X86 prefer that same pattern for vectors.

Here i'm adding a new TLI hook, always defaulting to the inc-of-add,
but adding AArch64,ARM,PowerPC overrides to prefer inc-of-add only for scalars.

Reviewers: spatel, RKSimon, efriedma, t.p.northover, hfinkel

Reviewed By: efriedma

Subscribers: nemanjai, javed.absar, kristof.beyls, kbarton, jsji, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D64090

Modified:
    llvm/trunk/include/llvm/CodeGen/TargetLowering.h
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
    llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
    llvm/trunk/lib/Target/ARM/ARMISelLowering.h
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
    llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
    llvm/trunk/test/CodeGen/AArch64/inc-of-add.ll
    llvm/trunk/test/CodeGen/AArch64/sub-of-not.ll
    llvm/trunk/test/CodeGen/ARM/inc-of-add.ll
    llvm/trunk/test/CodeGen/ARM/sub-of-not.ll
    llvm/trunk/test/CodeGen/PowerPC/inc-of-add.ll
    llvm/trunk/test/CodeGen/PowerPC/sub-of-not.ll
    llvm/trunk/test/CodeGen/X86/sub-of-not.ll

Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Wed Jul  3 02:41:35 2019
@@ -566,6 +566,16 @@ public:
     return false;
   }
 
+  /// These two forms are equivalent:
+  ///   sub %y, (xor %x, -1)
+  ///   add (add %x, 1), %y
+  /// The variant with two add's is IR-canonical.
+  /// Some targets may prefer one to the other.
+  virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
+    // By default, let's assume that everyone prefers the form with two add's.
+    return true;
+  }
+
   /// Return true if the target wants to use the optimization that
   /// turns ext(promotableInst1(...(promotableInstN(load)))) into
   /// promotedInst1(...(promotedInstN(ext(load)))).

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed Jul  3 02:41:35 2019
@@ -2424,6 +2424,17 @@ SDValue DAGCombiner::visitADDLike(SDNode
       if (Xor)
         return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
     }
+
+    // Look for:
+    //   add (add x, y), 1
+    // And if the target does not like this form then turn into:
+    //   sub y, (xor x, -1)
+    if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
+        N0.getOpcode() == ISD::ADD) {
+      SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+                                DAG.getAllOnesConstant(DL, VT));
+      return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
+    }
   }
 
   // (x - y) + -1  ->  add (xor y, -1), x
@@ -2584,6 +2595,17 @@ SDValue DAGCombiner::visitADDLikeCommuta
   if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
     return V;
 
+  // Look for:
+  //   add (add x, 1), y
+  // And if the target does not like this form then turn into:
+  //   sub y, (xor x, -1)
+  if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
+      N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
+    SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+                              DAG.getAllOnesConstant(DL, VT));
+    return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
+  }
+
   // Hoist one-use subtraction by non-opaque constant:
   //   (x - C) + y  ->  (x + y) - C
   // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
@@ -3108,6 +3130,15 @@ SDValue DAGCombiner::visitSUB(SDNode *N)
     return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
   }
 
+  // Look for:
+  //   sub y, (xor x, -1)
+  // And if the target does not like this form then turn into:
+  //   add (add x, y), 1
+  if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
+    SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
+    return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
+  }
+
   // Hoist one-use addition by non-opaque constant:
   //   (x + C) - y  ->  (x - y) + C
   if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Wed Jul  3 02:41:35 2019
@@ -12039,6 +12039,11 @@ bool AArch64TargetLowering::isIntDivChea
   return OptSize && !VT.isVector();
 }
 
+bool AArch64TargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
+  // We want inc-of-add for scalars and sub-of-not for vectors.
+  return VT.isScalarInteger();
+}
+
 bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
   return Subtarget->hasAggressiveFMA() && VT.isFloatingPoint();
 }

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h Wed Jul  3 02:41:35 2019
@@ -497,6 +497,8 @@ public:
     return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
   }
 
+  bool preferIncOfAddToSubOfNot(EVT VT) const override;
+
   bool hasBitPreservingFPLogic(EVT VT) const override {
     // FIXME: Is this always true? It should be true for vectors at least.
     return VT == MVT::f32 || VT == MVT::f64;

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp Wed Jul  3 02:41:35 2019
@@ -10736,6 +10736,15 @@ bool ARMTargetLowering::shouldFoldConsta
   return false;
 }
 
+bool ARMTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
+  if (!Subtarget->hasNEON()) {
+    if (Subtarget->isThumb1Only())
+      return VT.getScalarSizeInBits() <= 32;
+    return true;
+  }
+  return VT.isScalarInteger();
+}
+
 static SDValue PerformSHLSimplify(SDNode *N,
                                 TargetLowering::DAGCombinerInfo &DCI,
                                 const ARMSubtarget *ST) {

Modified: llvm/trunk/lib/Target/ARM/ARMISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelLowering.h?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelLowering.h (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h Wed Jul  3 02:41:35 2019
@@ -608,6 +608,9 @@ class VectorType;
 
     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
                                            CombineLevel Level) const override;
+
+    bool preferIncOfAddToSubOfNot(EVT VT) const override;
+
   protected:
     std::pair<const TargetRegisterClass *, uint8_t>
     findRepresentativeClass(const TargetRegisterInfo *TRI,

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.cpp Wed Jul  3 02:41:35 2019
@@ -1279,6 +1279,10 @@ bool PPCTargetLowering::hasSPE() const {
   return Subtarget.hasSPE();
 }
 
+bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const {
+  return VT.isScalarInteger();
+}
+
 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
   switch ((PPCISD::NodeType)Opcode) {
   case PPCISD::FIRST_NUMBER:    break;

Modified: llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h (original)
+++ llvm/trunk/lib/Target/PowerPC/PPCISelLowering.h Wed Jul  3 02:41:35 2019
@@ -634,6 +634,8 @@ namespace llvm {
       return true;
     }
 
+    bool preferIncOfAddToSubOfNot(EVT VT) const override;
+
     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
       return VT.isScalarInteger();
     }

Modified: llvm/trunk/test/CodeGen/AArch64/inc-of-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/inc-of-add.ll?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/inc-of-add.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/inc-of-add.ll Wed Jul  3 02:41:35 2019
@@ -53,9 +53,8 @@ define i64 @scalar_i64(i64 %x, i64 %y) n
 define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; CHECK-LABEL: vector_i128_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add v0.16b, v0.16b, v1.16b
-; CHECK-NEXT:    movi v1.16b, #1
-; CHECK-NEXT:    add v0.16b, v0.16b, v1.16b
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    sub v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    ret
   %t0 = add <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %t1 = add <16 x i8> %y, %t0
@@ -65,9 +64,8 @@ define <16 x i8> @vector_i128_i8(<16 x i
 define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; CHECK-LABEL: vector_i128_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
-; CHECK-NEXT:    movi v1.8h, #1
-; CHECK-NEXT:    add v0.8h, v0.8h, v1.8h
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    sub v0.8h, v1.8h, v0.8h
 ; CHECK-NEXT:    ret
   %t0 = add <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %t1 = add <8 x i16> %y, %t0
@@ -77,9 +75,8 @@ define <8 x i16> @vector_i128_i16(<8 x i
 define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; CHECK-LABEL: vector_i128_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
-; CHECK-NEXT:    movi v1.4s, #1
-; CHECK-NEXT:    add v0.4s, v0.4s, v1.4s
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    sub v0.4s, v1.4s, v0.4s
 ; CHECK-NEXT:    ret
   %t0 = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
   %t1 = add <4 x i32> %y, %t0
@@ -89,10 +86,8 @@ define <4 x i32> @vector_i128_i32(<4 x i
 define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y) nounwind {
 ; CHECK-LABEL: vector_i128_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mov w8, #1
-; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
-; CHECK-NEXT:    dup v1.2d, x8
-; CHECK-NEXT:    add v0.2d, v0.2d, v1.2d
+; CHECK-NEXT:    mvn v0.16b, v0.16b
+; CHECK-NEXT:    sub v0.2d, v1.2d, v0.2d
 ; CHECK-NEXT:    ret
   %t0 = add <2 x i64> %x, <i64 1, i64 1>
   %t1 = add <2 x i64> %y, %t0

Modified: llvm/trunk/test/CodeGen/AArch64/sub-of-not.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/sub-of-not.ll?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/sub-of-not.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/sub-of-not.ll Wed Jul  3 02:41:35 2019
@@ -9,8 +9,8 @@
 define i8 @scalar_i8(i8 %x, i8 %y) nounwind {
 ; CHECK-LABEL: scalar_i8:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn w8, w0
-; CHECK-NEXT:    sub w0, w1, w8
+; CHECK-NEXT:    add w8, w1, w0
+; CHECK-NEXT:    add w0, w8, #1 // =1
 ; CHECK-NEXT:    ret
   %t0 = xor i8 %x, -1
   %t1 = sub i8 %y, %t0
@@ -20,8 +20,8 @@ define i8 @scalar_i8(i8 %x, i8 %y) nounw
 define i16 @scalar_i16(i16 %x, i16 %y) nounwind {
 ; CHECK-LABEL: scalar_i16:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn w8, w0
-; CHECK-NEXT:    sub w0, w1, w8
+; CHECK-NEXT:    add w8, w1, w0
+; CHECK-NEXT:    add w0, w8, #1 // =1
 ; CHECK-NEXT:    ret
   %t0 = xor i16 %x, -1
   %t1 = sub i16 %y, %t0
@@ -31,8 +31,8 @@ define i16 @scalar_i16(i16 %x, i16 %y) n
 define i32 @scalar_i32(i32 %x, i32 %y) nounwind {
 ; CHECK-LABEL: scalar_i32:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn w8, w0
-; CHECK-NEXT:    sub w0, w1, w8
+; CHECK-NEXT:    add w8, w1, w0
+; CHECK-NEXT:    add w0, w8, #1 // =1
 ; CHECK-NEXT:    ret
   %t0 = xor i32 %x, -1
   %t1 = sub i32 %y, %t0
@@ -42,8 +42,8 @@ define i32 @scalar_i32(i32 %x, i32 %y) n
 define i64 @scalar_i64(i64 %x, i64 %y) nounwind {
 ; CHECK-LABEL: scalar_i64:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    mvn x8, x0
-; CHECK-NEXT:    sub x0, x1, x8
+; CHECK-NEXT:    add x8, x1, x0
+; CHECK-NEXT:    add x0, x8, #1 // =1
 ; CHECK-NEXT:    ret
   %t0 = xor i64 %x, -1
   %t1 = sub i64 %y, %t0

Modified: llvm/trunk/test/CodeGen/ARM/inc-of-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/inc-of-add.ll?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/inc-of-add.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/inc-of-add.ll Wed Jul  3 02:41:35 2019
@@ -91,11 +91,11 @@ define i64 @scalar_i64(i64 %x, i64 %y) n
 ;
 ; THUMB6-LABEL: scalar_i64:
 ; THUMB6:       @ %bb.0:
-; THUMB6-NEXT:    adds r0, r0, r2
-; THUMB6-NEXT:    adcs r1, r3
-; THUMB6-NEXT:    movs r2, #0
-; THUMB6-NEXT:    adds r0, r0, #1
-; THUMB6-NEXT:    adcs r1, r2
+; THUMB6-NEXT:    mvns r1, r1
+; THUMB6-NEXT:    mvns r0, r0
+; THUMB6-NEXT:    subs r0, r2, r0
+; THUMB6-NEXT:    sbcs r3, r1
+; THUMB6-NEXT:    mov r1, r3
 ; THUMB6-NEXT:    bx lr
 ;
 ; THUMB78-LABEL: scalar_i64:
@@ -196,12 +196,11 @@ define <16 x i8> @vector_i128_i8(<16 x i
 ; ARM78-LABEL: vector_i128_i8:
 ; ARM78:       @ %bb.0:
 ; ARM78-NEXT:    vmov d17, r2, r3
-; ARM78-NEXT:    mov r12, sp
 ; ARM78-NEXT:    vmov d16, r0, r1
-; ARM78-NEXT:    vld1.64 {d18, d19}, [r12]
-; ARM78-NEXT:    vmov.i8 q10, #0x1
-; ARM78-NEXT:    vadd.i8 q8, q8, q9
-; ARM78-NEXT:    vadd.i8 q8, q8, q10
+; ARM78-NEXT:    mov r0, sp
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vld1.64 {d18, d19}, [r0]
+; ARM78-NEXT:    vsub.i8 q8, q9, q8
 ; ARM78-NEXT:    vmov r0, r1, d16
 ; ARM78-NEXT:    vmov r2, r3, d17
 ; ARM78-NEXT:    bx lr
@@ -292,12 +291,11 @@ define <16 x i8> @vector_i128_i8(<16 x i
 ; THUMB78-LABEL: vector_i128_i8:
 ; THUMB78:       @ %bb.0:
 ; THUMB78-NEXT:    vmov d17, r2, r3
-; THUMB78-NEXT:    mov r12, sp
 ; THUMB78-NEXT:    vmov d16, r0, r1
-; THUMB78-NEXT:    vld1.64 {d18, d19}, [r12]
-; THUMB78-NEXT:    vmov.i8 q10, #0x1
-; THUMB78-NEXT:    vadd.i8 q8, q8, q9
-; THUMB78-NEXT:    vadd.i8 q8, q8, q10
+; THUMB78-NEXT:    mov r0, sp
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vld1.64 {d18, d19}, [r0]
+; THUMB78-NEXT:    vsub.i8 q8, q9, q8
 ; THUMB78-NEXT:    vmov r0, r1, d16
 ; THUMB78-NEXT:    vmov r2, r3, d17
 ; THUMB78-NEXT:    bx lr
@@ -352,12 +350,11 @@ define <8 x i16> @vector_i128_i16(<8 x i
 ; ARM78-LABEL: vector_i128_i16:
 ; ARM78:       @ %bb.0:
 ; ARM78-NEXT:    vmov d17, r2, r3
-; ARM78-NEXT:    mov r12, sp
 ; ARM78-NEXT:    vmov d16, r0, r1
-; ARM78-NEXT:    vld1.64 {d18, d19}, [r12]
-; ARM78-NEXT:    vmov.i16 q10, #0x1
-; ARM78-NEXT:    vadd.i16 q8, q8, q9
-; ARM78-NEXT:    vadd.i16 q8, q8, q10
+; ARM78-NEXT:    mov r0, sp
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vld1.64 {d18, d19}, [r0]
+; ARM78-NEXT:    vsub.i16 q8, q9, q8
 ; ARM78-NEXT:    vmov r0, r1, d16
 ; ARM78-NEXT:    vmov r2, r3, d17
 ; ARM78-NEXT:    bx lr
@@ -408,12 +405,11 @@ define <8 x i16> @vector_i128_i16(<8 x i
 ; THUMB78-LABEL: vector_i128_i16:
 ; THUMB78:       @ %bb.0:
 ; THUMB78-NEXT:    vmov d17, r2, r3
-; THUMB78-NEXT:    mov r12, sp
 ; THUMB78-NEXT:    vmov d16, r0, r1
-; THUMB78-NEXT:    vld1.64 {d18, d19}, [r12]
-; THUMB78-NEXT:    vmov.i16 q10, #0x1
-; THUMB78-NEXT:    vadd.i16 q8, q8, q9
-; THUMB78-NEXT:    vadd.i16 q8, q8, q10
+; THUMB78-NEXT:    mov r0, sp
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vld1.64 {d18, d19}, [r0]
+; THUMB78-NEXT:    vsub.i16 q8, q9, q8
 ; THUMB78-NEXT:    vmov r0, r1, d16
 ; THUMB78-NEXT:    vmov r2, r3, d17
 ; THUMB78-NEXT:    bx lr
@@ -442,12 +438,11 @@ define <4 x i32> @vector_i128_i32(<4 x i
 ; ARM78-LABEL: vector_i128_i32:
 ; ARM78:       @ %bb.0:
 ; ARM78-NEXT:    vmov d17, r2, r3
-; ARM78-NEXT:    mov r12, sp
 ; ARM78-NEXT:    vmov d16, r0, r1
-; ARM78-NEXT:    vld1.64 {d18, d19}, [r12]
-; ARM78-NEXT:    vmov.i32 q10, #0x1
-; ARM78-NEXT:    vadd.i32 q8, q8, q9
-; ARM78-NEXT:    vadd.i32 q8, q8, q10
+; ARM78-NEXT:    mov r0, sp
+; ARM78-NEXT:    vmvn q8, q8
+; ARM78-NEXT:    vld1.64 {d18, d19}, [r0]
+; ARM78-NEXT:    vsub.i32 q8, q9, q8
 ; ARM78-NEXT:    vmov r0, r1, d16
 ; ARM78-NEXT:    vmov r2, r3, d17
 ; ARM78-NEXT:    bx lr
@@ -472,12 +467,11 @@ define <4 x i32> @vector_i128_i32(<4 x i
 ; THUMB78-LABEL: vector_i128_i32:
 ; THUMB78:       @ %bb.0:
 ; THUMB78-NEXT:    vmov d17, r2, r3
-; THUMB78-NEXT:    mov r12, sp
 ; THUMB78-NEXT:    vmov d16, r0, r1
-; THUMB78-NEXT:    vld1.64 {d18, d19}, [r12]
-; THUMB78-NEXT:    vmov.i32 q10, #0x1
-; THUMB78-NEXT:    vadd.i32 q8, q8, q9
-; THUMB78-NEXT:    vadd.i32 q8, q8, q10
+; THUMB78-NEXT:    mov r0, sp
+; THUMB78-NEXT:    vmvn q8, q8
+; THUMB78-NEXT:    vld1.64 {d18, d19}, [r0]
+; THUMB78-NEXT:    vsub.i32 q8, q9, q8
 ; THUMB78-NEXT:    vmov r0, r1, d16
 ; THUMB78-NEXT:    vmov r2, r3, d17
 ; THUMB78-NEXT:    bx lr
@@ -509,60 +503,41 @@ define <2 x i64> @vector_i128_i64(<2 x i
 ; ARM78-NEXT:    vmov d17, r2, r3
 ; ARM78-NEXT:    vmov d16, r0, r1
 ; ARM78-NEXT:    mov r0, sp
+; ARM78-NEXT:    vmvn q8, q8
 ; ARM78-NEXT:    vld1.64 {d18, d19}, [r0]
-; ARM78-NEXT:    adr r0, .LCPI7_0
-; ARM78-NEXT:    vadd.i64 q8, q8, q9
-; ARM78-NEXT:    vld1.64 {d18, d19}, [r0:128]
-; ARM78-NEXT:    vadd.i64 q8, q8, q9
+; ARM78-NEXT:    vsub.i64 q8, q9, q8
 ; ARM78-NEXT:    vmov r0, r1, d16
 ; ARM78-NEXT:    vmov r2, r3, d17
 ; ARM78-NEXT:    bx lr
-; ARM78-NEXT:    .p2align 4
-; ARM78-NEXT:  @ %bb.1:
-; ARM78-NEXT:  .LCPI7_0:
-; ARM78-NEXT:    .long 1 @ 0x1
-; ARM78-NEXT:    .long 0 @ 0x0
-; ARM78-NEXT:    .long 1 @ 0x1
-; ARM78-NEXT:    .long 0 @ 0x0
 ;
 ; THUMB6-LABEL: vector_i128_i64:
 ; THUMB6:       @ %bb.0:
-; THUMB6-NEXT:    push {r4, r5, r6, lr}
-; THUMB6-NEXT:    ldr r4, [sp, #20]
+; THUMB6-NEXT:    push {r4, r5, r7, lr}
+; THUMB6-NEXT:    mvns r4, r1
+; THUMB6-NEXT:    mvns r0, r0
+; THUMB6-NEXT:    ldr r1, [sp, #20]
 ; THUMB6-NEXT:    ldr r5, [sp, #16]
-; THUMB6-NEXT:    adds r0, r0, r5
-; THUMB6-NEXT:    adcs r1, r4
-; THUMB6-NEXT:    movs r4, #0
-; THUMB6-NEXT:    adds r0, r0, #1
-; THUMB6-NEXT:    adcs r1, r4
-; THUMB6-NEXT:    ldr r5, [sp, #28]
-; THUMB6-NEXT:    ldr r6, [sp, #24]
-; THUMB6-NEXT:    adds r2, r2, r6
-; THUMB6-NEXT:    adcs r3, r5
-; THUMB6-NEXT:    adds r2, r2, #1
-; THUMB6-NEXT:    adcs r3, r4
-; THUMB6-NEXT:    pop {r4, r5, r6, pc}
+; THUMB6-NEXT:    subs r0, r5, r0
+; THUMB6-NEXT:    sbcs r1, r4
+; THUMB6-NEXT:    mvns r4, r3
+; THUMB6-NEXT:    mvns r2, r2
+; THUMB6-NEXT:    ldr r3, [sp, #28]
+; THUMB6-NEXT:    ldr r5, [sp, #24]
+; THUMB6-NEXT:    subs r2, r5, r2
+; THUMB6-NEXT:    sbcs r3, r4
+; THUMB6-NEXT:    pop {r4, r5, r7, pc}
 ;
 ; THUMB78-LABEL: vector_i128_i64:
 ; THUMB78:       @ %bb.0:
 ; THUMB78-NEXT:    vmov d17, r2, r3
 ; THUMB78-NEXT:    vmov d16, r0, r1
 ; THUMB78-NEXT:    mov r0, sp
+; THUMB78-NEXT:    vmvn q8, q8
 ; THUMB78-NEXT:    vld1.64 {d18, d19}, [r0]
-; THUMB78-NEXT:    adr r0, .LCPI7_0
-; THUMB78-NEXT:    vadd.i64 q8, q8, q9
-; THUMB78-NEXT:    vld1.64 {d18, d19}, [r0:128]
-; THUMB78-NEXT:    vadd.i64 q8, q8, q9
+; THUMB78-NEXT:    vsub.i64 q8, q9, q8
 ; THUMB78-NEXT:    vmov r0, r1, d16
 ; THUMB78-NEXT:    vmov r2, r3, d17
 ; THUMB78-NEXT:    bx lr
-; THUMB78-NEXT:    .p2align 4
-; THUMB78-NEXT:  @ %bb.1:
-; THUMB78-NEXT:  .LCPI7_0:
-; THUMB78-NEXT:    .long 1 @ 0x1
-; THUMB78-NEXT:    .long 0 @ 0x0
-; THUMB78-NEXT:    .long 1 @ 0x1
-; THUMB78-NEXT:    .long 0 @ 0x0
   %t0 = add <2 x i64> %x, <i64 1, i64 1>
   %t1 = add <2 x i64> %y, %t0
   ret <2 x i64> %t1

Modified: llvm/trunk/test/CodeGen/ARM/sub-of-not.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/sub-of-not.ll?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/sub-of-not.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/sub-of-not.ll Wed Jul  3 02:41:35 2019
@@ -14,15 +14,21 @@
 define i8 @scalar_i8(i8 %x, i8 %y) nounwind {
 ; ARM-LABEL: scalar_i8:
 ; ARM:       @ %bb.0:
-; ARM-NEXT:    mvn r0, r0
-; ARM-NEXT:    sub r0, r1, r0
+; ARM-NEXT:    add r0, r1, r0
+; ARM-NEXT:    add r0, r0, #1
 ; ARM-NEXT:    bx lr
 ;
-; THUMB-LABEL: scalar_i8:
-; THUMB:       @ %bb.0:
-; THUMB-NEXT:    mvns r0, r0
-; THUMB-NEXT:    subs r0, r1, r0
-; THUMB-NEXT:    bx lr
+; THUMB6-LABEL: scalar_i8:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    adds r0, r1, r0
+; THUMB6-NEXT:    adds r0, r0, #1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i8:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    add r0, r1
+; THUMB78-NEXT:    adds r0, #1
+; THUMB78-NEXT:    bx lr
   %t0 = xor i8 %x, -1
   %t1 = sub i8 %y, %t0
   ret i8 %t1
@@ -31,15 +37,21 @@ define i8 @scalar_i8(i8 %x, i8 %y) nounw
 define i16 @scalar_i16(i16 %x, i16 %y) nounwind {
 ; ARM-LABEL: scalar_i16:
 ; ARM:       @ %bb.0:
-; ARM-NEXT:    mvn r0, r0
-; ARM-NEXT:    sub r0, r1, r0
+; ARM-NEXT:    add r0, r1, r0
+; ARM-NEXT:    add r0, r0, #1
 ; ARM-NEXT:    bx lr
 ;
-; THUMB-LABEL: scalar_i16:
-; THUMB:       @ %bb.0:
-; THUMB-NEXT:    mvns r0, r0
-; THUMB-NEXT:    subs r0, r1, r0
-; THUMB-NEXT:    bx lr
+; THUMB6-LABEL: scalar_i16:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    adds r0, r1, r0
+; THUMB6-NEXT:    adds r0, r0, #1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i16:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    add r0, r1
+; THUMB78-NEXT:    adds r0, #1
+; THUMB78-NEXT:    bx lr
   %t0 = xor i16 %x, -1
   %t1 = sub i16 %y, %t0
   ret i16 %t1
@@ -48,15 +60,21 @@ define i16 @scalar_i16(i16 %x, i16 %y) n
 define i32 @scalar_i32(i32 %x, i32 %y) nounwind {
 ; ARM-LABEL: scalar_i32:
 ; ARM:       @ %bb.0:
-; ARM-NEXT:    mvn r0, r0
-; ARM-NEXT:    sub r0, r1, r0
+; ARM-NEXT:    add r0, r1, r0
+; ARM-NEXT:    add r0, r0, #1
 ; ARM-NEXT:    bx lr
 ;
-; THUMB-LABEL: scalar_i32:
-; THUMB:       @ %bb.0:
-; THUMB-NEXT:    mvns r0, r0
-; THUMB-NEXT:    subs r0, r1, r0
-; THUMB-NEXT:    bx lr
+; THUMB6-LABEL: scalar_i32:
+; THUMB6:       @ %bb.0:
+; THUMB6-NEXT:    adds r0, r1, r0
+; THUMB6-NEXT:    adds r0, r0, #1
+; THUMB6-NEXT:    bx lr
+;
+; THUMB78-LABEL: scalar_i32:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    add r0, r1
+; THUMB78-NEXT:    adds r0, #1
+; THUMB78-NEXT:    bx lr
   %t0 = xor i32 %x, -1
   %t1 = sub i32 %y, %t0
   ret i32 %t1
@@ -65,10 +83,10 @@ define i32 @scalar_i32(i32 %x, i32 %y) n
 define i64 @scalar_i64(i64 %x, i64 %y) nounwind {
 ; ARM-LABEL: scalar_i64:
 ; ARM:       @ %bb.0:
-; ARM-NEXT:    mvn r0, r0
-; ARM-NEXT:    mvn r1, r1
-; ARM-NEXT:    subs r0, r2, r0
-; ARM-NEXT:    sbc r1, r3, r1
+; ARM-NEXT:    adds r0, r2, r0
+; ARM-NEXT:    adc r1, r3, r1
+; ARM-NEXT:    adds r0, r0, #1
+; ARM-NEXT:    adc r1, r1, #0
 ; ARM-NEXT:    bx lr
 ;
 ; THUMB6-LABEL: scalar_i64:
@@ -80,21 +98,13 @@ define i64 @scalar_i64(i64 %x, i64 %y) n
 ; THUMB6-NEXT:    mov r1, r3
 ; THUMB6-NEXT:    bx lr
 ;
-; THUMB7-LABEL: scalar_i64:
-; THUMB7:       @ %bb.0:
-; THUMB7-NEXT:    mvns r0, r0
-; THUMB7-NEXT:    mvns r1, r1
-; THUMB7-NEXT:    subs r0, r2, r0
-; THUMB7-NEXT:    sbc.w r1, r3, r1
-; THUMB7-NEXT:    bx lr
-;
-; THUMB8-LABEL: scalar_i64:
-; THUMB8:       @ %bb.0:
-; THUMB8-NEXT:    mvns r1, r1
-; THUMB8-NEXT:    mvns r0, r0
-; THUMB8-NEXT:    subs r0, r2, r0
-; THUMB8-NEXT:    sbc.w r1, r3, r1
-; THUMB8-NEXT:    bx lr
+; THUMB78-LABEL: scalar_i64:
+; THUMB78:       @ %bb.0:
+; THUMB78-NEXT:    adds r0, r0, r2
+; THUMB78-NEXT:    adcs r1, r3
+; THUMB78-NEXT:    adds r0, #1
+; THUMB78-NEXT:    adc r1, r1, #0
+; THUMB78-NEXT:    bx lr
   %t0 = xor i64 %x, -1
   %t1 = sub i64 %y, %t0
   ret i64 %t1
@@ -103,83 +113,83 @@ define i64 @scalar_i64(i64 %x, i64 %y) n
 define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; ARM6-LABEL: vector_i128_i8:
 ; ARM6:       @ %bb.0:
-; ARM6-NEXT:    ldrb r1, [sp, #52]
-; ARM6-NEXT:    mvn r12, r1
+; ARM6-NEXT:    ldrb r12, [sp, #52]
 ; ARM6-NEXT:    ldrb r1, [sp, #116]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp, #48]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #15]
-; ARM6-NEXT:    ldrb r1, [sp, #48]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #112]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp, #44]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #14]
-; ARM6-NEXT:    ldrb r1, [sp, #44]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #108]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp, #40]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #13]
-; ARM6-NEXT:    ldrb r1, [sp, #40]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #104]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp, #36]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #12]
-; ARM6-NEXT:    ldrb r1, [sp, #36]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #100]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp, #32]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #11]
-; ARM6-NEXT:    ldrb r1, [sp, #32]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #96]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp, #28]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #10]
-; ARM6-NEXT:    ldrb r1, [sp, #28]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #92]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp, #24]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #9]
-; ARM6-NEXT:    ldrb r1, [sp, #24]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #88]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp, #20]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #8]
-; ARM6-NEXT:    ldrb r1, [sp, #20]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #84]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp, #16]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #7]
-; ARM6-NEXT:    ldrb r1, [sp, #16]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #80]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp, #12]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #6]
-; ARM6-NEXT:    ldrb r1, [sp, #12]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #76]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp, #8]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #5]
-; ARM6-NEXT:    ldrb r1, [sp, #8]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #72]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp, #4]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #4]
-; ARM6-NEXT:    ldrb r1, [sp, #4]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #68]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrb r12, [sp]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #3]
-; ARM6-NEXT:    ldrb r1, [sp]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrb r1, [sp, #64]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #2]
-; ARM6-NEXT:    mvn r1, r3
-; ARM6-NEXT:    ldrb r3, [sp, #60]
-; ARM6-NEXT:    sub r1, r3, r1
+; ARM6-NEXT:    ldrb r1, [sp, #60]
+; ARM6-NEXT:    add r1, r1, r3
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0, #1]
-; ARM6-NEXT:    mvn r1, r2
-; ARM6-NEXT:    ldrb r2, [sp, #56]
-; ARM6-NEXT:    sub r1, r2, r1
+; ARM6-NEXT:    ldrb r1, [sp, #56]
+; ARM6-NEXT:    add r1, r1, r2
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strb r1, [r0]
 ; ARM6-NEXT:    bx lr
 ;
@@ -199,82 +209,82 @@ define <16 x i8> @vector_i128_i8(<16 x i
 ; THUMB6:       @ %bb.0:
 ; THUMB6-NEXT:    push {r4, lr}
 ; THUMB6-NEXT:    ldr r1, [sp, #60]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #124]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #15]
 ; THUMB6-NEXT:    ldr r1, [sp, #56]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #120]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #14]
 ; THUMB6-NEXT:    ldr r1, [sp, #52]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #116]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #13]
 ; THUMB6-NEXT:    ldr r1, [sp, #48]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #112]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #12]
 ; THUMB6-NEXT:    ldr r1, [sp, #44]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #108]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #11]
 ; THUMB6-NEXT:    ldr r1, [sp, #40]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #104]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #10]
 ; THUMB6-NEXT:    ldr r1, [sp, #36]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #100]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #9]
 ; THUMB6-NEXT:    ldr r1, [sp, #32]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #96]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #8]
 ; THUMB6-NEXT:    ldr r1, [sp, #28]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #92]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #7]
 ; THUMB6-NEXT:    ldr r1, [sp, #24]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #88]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #6]
 ; THUMB6-NEXT:    ldr r1, [sp, #20]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #84]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #5]
 ; THUMB6-NEXT:    ldr r1, [sp, #16]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #80]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #4]
 ; THUMB6-NEXT:    ldr r1, [sp, #12]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #76]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #3]
 ; THUMB6-NEXT:    ldr r1, [sp, #8]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #72]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #2]
-; THUMB6-NEXT:    mvns r1, r3
-; THUMB6-NEXT:    ldr r3, [sp, #68]
-; THUMB6-NEXT:    subs r1, r3, r1
+; THUMB6-NEXT:    ldr r1, [sp, #68]
+; THUMB6-NEXT:    adds r1, r1, r3
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0, #1]
-; THUMB6-NEXT:    mvns r1, r2
-; THUMB6-NEXT:    ldr r2, [sp, #64]
-; THUMB6-NEXT:    subs r1, r2, r1
+; THUMB6-NEXT:    ldr r1, [sp, #64]
+; THUMB6-NEXT:    adds r1, r1, r2
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strb r1, [r0]
 ; THUMB6-NEXT:    pop {r4, pc}
 ;
@@ -297,43 +307,43 @@ define <16 x i8> @vector_i128_i8(<16 x i
 define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; ARM6-LABEL: vector_i128_i16:
 ; ARM6:       @ %bb.0:
-; ARM6-NEXT:    ldrh r1, [sp, #20]
-; ARM6-NEXT:    mvn r12, r1
+; ARM6-NEXT:    ldrh r12, [sp, #20]
 ; ARM6-NEXT:    ldrh r1, [sp, #52]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrh r12, [sp, #16]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strh r1, [r0, #14]
-; ARM6-NEXT:    ldrh r1, [sp, #16]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrh r1, [sp, #48]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrh r12, [sp, #12]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strh r1, [r0, #12]
-; ARM6-NEXT:    ldrh r1, [sp, #12]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrh r1, [sp, #44]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrh r12, [sp, #8]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strh r1, [r0, #10]
-; ARM6-NEXT:    ldrh r1, [sp, #8]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrh r1, [sp, #40]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrh r12, [sp, #4]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strh r1, [r0, #8]
-; ARM6-NEXT:    ldrh r1, [sp, #4]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrh r1, [sp, #36]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    ldrh r12, [sp]
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strh r1, [r0, #6]
-; ARM6-NEXT:    ldrh r1, [sp]
-; ARM6-NEXT:    mvn r12, r1
 ; ARM6-NEXT:    ldrh r1, [sp, #32]
-; ARM6-NEXT:    sub r1, r1, r12
+; ARM6-NEXT:    add r1, r1, r12
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strh r1, [r0, #4]
-; ARM6-NEXT:    mvn r1, r3
-; ARM6-NEXT:    ldrh r3, [sp, #28]
-; ARM6-NEXT:    sub r1, r3, r1
+; ARM6-NEXT:    ldrh r1, [sp, #28]
+; ARM6-NEXT:    add r1, r1, r3
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strh r1, [r0, #2]
-; ARM6-NEXT:    mvn r1, r2
-; ARM6-NEXT:    ldrh r2, [sp, #24]
-; ARM6-NEXT:    sub r1, r2, r1
+; ARM6-NEXT:    ldrh r1, [sp, #24]
+; ARM6-NEXT:    add r1, r1, r2
+; ARM6-NEXT:    add r1, r1, #1
 ; ARM6-NEXT:    strh r1, [r0]
 ; ARM6-NEXT:    bx lr
 ;
@@ -353,42 +363,42 @@ define <8 x i16> @vector_i128_i16(<8 x i
 ; THUMB6:       @ %bb.0:
 ; THUMB6-NEXT:    push {r4, lr}
 ; THUMB6-NEXT:    ldr r1, [sp, #28]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #60]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strh r1, [r0, #14]
 ; THUMB6-NEXT:    ldr r1, [sp, #24]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #56]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strh r1, [r0, #12]
 ; THUMB6-NEXT:    ldr r1, [sp, #20]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #52]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strh r1, [r0, #10]
 ; THUMB6-NEXT:    ldr r1, [sp, #16]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #48]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strh r1, [r0, #8]
 ; THUMB6-NEXT:    ldr r1, [sp, #12]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #44]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strh r1, [r0, #6]
 ; THUMB6-NEXT:    ldr r1, [sp, #8]
-; THUMB6-NEXT:    mvns r1, r1
 ; THUMB6-NEXT:    ldr r4, [sp, #40]
-; THUMB6-NEXT:    subs r1, r4, r1
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strh r1, [r0, #4]
-; THUMB6-NEXT:    mvns r1, r3
-; THUMB6-NEXT:    ldr r3, [sp, #36]
-; THUMB6-NEXT:    subs r1, r3, r1
+; THUMB6-NEXT:    ldr r1, [sp, #36]
+; THUMB6-NEXT:    adds r1, r1, r3
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strh r1, [r0, #2]
-; THUMB6-NEXT:    mvns r1, r2
-; THUMB6-NEXT:    ldr r2, [sp, #32]
-; THUMB6-NEXT:    subs r1, r2, r1
+; THUMB6-NEXT:    ldr r1, [sp, #32]
+; THUMB6-NEXT:    adds r1, r1, r2
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    strh r1, [r0]
 ; THUMB6-NEXT:    pop {r4, pc}
 ;
@@ -411,18 +421,18 @@ define <8 x i16> @vector_i128_i16(<8 x i
 define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; ARM6-LABEL: vector_i128_i32:
 ; ARM6:       @ %bb.0:
-; ARM6-NEXT:    mvn r12, r0
-; ARM6-NEXT:    ldr r0, [sp]
-; ARM6-NEXT:    sub r0, r0, r12
-; ARM6-NEXT:    mvn r12, r1
-; ARM6-NEXT:    ldr r1, [sp, #4]
-; ARM6-NEXT:    sub r1, r1, r12
-; ARM6-NEXT:    mvn r12, r2
-; ARM6-NEXT:    ldr r2, [sp, #8]
-; ARM6-NEXT:    sub r2, r2, r12
-; ARM6-NEXT:    mvn r12, r3
-; ARM6-NEXT:    ldr r3, [sp, #12]
-; ARM6-NEXT:    sub r3, r3, r12
+; ARM6-NEXT:    ldr r12, [sp]
+; ARM6-NEXT:    add r0, r12, r0
+; ARM6-NEXT:    ldr r12, [sp, #4]
+; ARM6-NEXT:    add r0, r0, #1
+; ARM6-NEXT:    add r1, r12, r1
+; ARM6-NEXT:    ldr r12, [sp, #8]
+; ARM6-NEXT:    add r1, r1, #1
+; ARM6-NEXT:    add r2, r12, r2
+; ARM6-NEXT:    ldr r12, [sp, #12]
+; ARM6-NEXT:    add r2, r2, #1
+; ARM6-NEXT:    add r3, r12, r3
+; ARM6-NEXT:    add r3, r3, #1
 ; ARM6-NEXT:    bx lr
 ;
 ; ARM78-LABEL: vector_i128_i32:
@@ -440,18 +450,18 @@ define <4 x i32> @vector_i128_i32(<4 x i
 ; THUMB6-LABEL: vector_i128_i32:
 ; THUMB6:       @ %bb.0:
 ; THUMB6-NEXT:    push {r4, lr}
-; THUMB6-NEXT:    mvns r0, r0
 ; THUMB6-NEXT:    ldr r4, [sp, #8]
-; THUMB6-NEXT:    subs r0, r4, r0
-; THUMB6-NEXT:    mvns r1, r1
+; THUMB6-NEXT:    adds r0, r4, r0
+; THUMB6-NEXT:    adds r0, r0, #1
 ; THUMB6-NEXT:    ldr r4, [sp, #12]
-; THUMB6-NEXT:    subs r1, r4, r1
-; THUMB6-NEXT:    mvns r2, r2
+; THUMB6-NEXT:    adds r1, r4, r1
+; THUMB6-NEXT:    adds r1, r1, #1
 ; THUMB6-NEXT:    ldr r4, [sp, #16]
-; THUMB6-NEXT:    subs r2, r4, r2
-; THUMB6-NEXT:    mvns r3, r3
+; THUMB6-NEXT:    adds r2, r4, r2
+; THUMB6-NEXT:    adds r2, r2, #1
 ; THUMB6-NEXT:    ldr r4, [sp, #20]
-; THUMB6-NEXT:    subs r3, r4, r3
+; THUMB6-NEXT:    adds r3, r4, r3
+; THUMB6-NEXT:    adds r3, r3, #1
 ; THUMB6-NEXT:    pop {r4, pc}
 ;
 ; THUMB78-LABEL: vector_i128_i32:
@@ -474,18 +484,18 @@ define <2 x i64> @vector_i128_i64(<2 x i
 ; ARM6-LABEL: vector_i128_i64:
 ; ARM6:       @ %bb.0:
 ; ARM6-NEXT:    push {r11, lr}
-; ARM6-NEXT:    mvn lr, r1
-; ARM6-NEXT:    ldr r1, [sp, #8]
-; ARM6-NEXT:    mvn r0, r0
+; ARM6-NEXT:    ldr lr, [sp, #8]
 ; ARM6-NEXT:    ldr r12, [sp, #12]
-; ARM6-NEXT:    subs r0, r1, r0
-; ARM6-NEXT:    mvn r2, r2
-; ARM6-NEXT:    sbc r1, r12, lr
-; ARM6-NEXT:    mvn lr, r3
-; ARM6-NEXT:    ldr r3, [sp, #16]
+; ARM6-NEXT:    adds r0, lr, r0
+; ARM6-NEXT:    ldr lr, [sp, #16]
+; ARM6-NEXT:    adc r1, r12, r1
+; ARM6-NEXT:    adds r0, r0, #1
 ; ARM6-NEXT:    ldr r12, [sp, #20]
-; ARM6-NEXT:    subs r2, r3, r2
-; ARM6-NEXT:    sbc r3, r12, lr
+; ARM6-NEXT:    adc r1, r1, #0
+; ARM6-NEXT:    adds r2, lr, r2
+; ARM6-NEXT:    adc r3, r12, r3
+; ARM6-NEXT:    adds r2, r2, #1
+; ARM6-NEXT:    adc r3, r3, #0
 ; ARM6-NEXT:    pop {r11, pc}
 ;
 ; ARM78-LABEL: vector_i128_i64:

Modified: llvm/trunk/test/CodeGen/PowerPC/inc-of-add.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/inc-of-add.ll?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/inc-of-add.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/inc-of-add.ll Wed Jul  3 02:41:35 2019
@@ -64,89 +64,89 @@ define <16 x i8> @vector_i128_i8(<16 x i
 ; PPC32-LABEL: vector_i128_i8:
 ; PPC32:       # %bb.0:
 ; PPC32-NEXT:    stwu 1, -64(1)
-; PPC32-NEXT:    lbz 12, 175(1)
-; PPC32-NEXT:    lbz 0, 111(1)
-; PPC32-NEXT:    lbz 4, 171(1)
-; PPC32-NEXT:    lbz 11, 107(1)
 ; PPC32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
-; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 12, 0, 12
-; PPC32-NEXT:    lbz 22, 167(1)
-; PPC32-NEXT:    lbz 21, 103(1)
-; PPC32-NEXT:    lbz 23, 163(1)
-; PPC32-NEXT:    lbz 0, 99(1)
+; PPC32-NEXT:    lbz 4, 119(1)
+; PPC32-NEXT:    lbz 11, 115(1)
 ; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 4, 11, 4
-; PPC32-NEXT:    lbz 24, 159(1)
-; PPC32-NEXT:    lbz 11, 95(1)
-; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 22, 21, 22
-; PPC32-NEXT:    lbz 25, 155(1)
-; PPC32-NEXT:    lbz 21, 91(1)
-; PPC32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 23, 0, 23
-; PPC32-NEXT:    lbz 26, 151(1)
-; PPC32-NEXT:    lbz 0, 87(1)
-; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 11, 11, 24
-; PPC32-NEXT:    lbz 27, 147(1)
+; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 4, 4, 6
+; PPC32-NEXT:    lbz 21, 123(1)
+; PPC32-NEXT:    lbz 6, 131(1)
+; PPC32-NEXT:    add 5, 11, 5
+; PPC32-NEXT:    lbz 11, 127(1)
+; PPC32-NEXT:    add 7, 21, 7
+; PPC32-NEXT:    lbz 21, 135(1)
 ; PPC32-NEXT:    lbz 24, 83(1)
-; PPC32-NEXT:    addi 4, 4, 1
-; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
-; PPC32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
+; PPC32-NEXT:    lbz 23, 79(1)
+; PPC32-NEXT:    add 6, 6, 9
+; PPC32-NEXT:    add 10, 21, 10
+; PPC32-NEXT:    lbz 21, 147(1)
+; PPC32-NEXT:    lbz 9, 143(1)
+; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 8, 11, 8
+; PPC32-NEXT:    lbz 22, 75(1)
+; PPC32-NEXT:    lbz 11, 139(1)
+; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 24, 21, 24
+; PPC32-NEXT:    lbz 27, 95(1)
+; PPC32-NEXT:    lbz 21, 159(1)
+; PPC32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 9, 9, 23
+; PPC32-NEXT:    lbz 26, 91(1)
+; PPC32-NEXT:    lbz 23, 155(1)
+; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 11, 11, 22
+; PPC32-NEXT:    lbz 25, 87(1)
+; PPC32-NEXT:    lbz 22, 151(1)
+; PPC32-NEXT:    lbz 12, 111(1)
+; PPC32-NEXT:    add 27, 21, 27
+; PPC32-NEXT:    lbz 21, 175(1)
 ; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
-; PPC32-NEXT:    add 25, 21, 25
-; PPC32-NEXT:    lbz 28, 143(1)
-; PPC32-NEXT:    lbz 21, 79(1)
-; PPC32-NEXT:    stb 4, 14(3)
-; PPC32-NEXT:    addi 4, 22, 1
-; PPC32-NEXT:    lbz 29, 139(1)
-; PPC32-NEXT:    add 26, 0, 26
-; PPC32-NEXT:    lbz 0, 75(1)
-; PPC32-NEXT:    stb 4, 13(3)
-; PPC32-NEXT:    addi 4, 23, 1
-; PPC32-NEXT:    add 27, 24, 27
-; PPC32-NEXT:    lbz 24, 135(1)
-; PPC32-NEXT:    stb 4, 12(3)
-; PPC32-NEXT:    addi 4, 11, 1
-; PPC32-NEXT:    stb 4, 11(3)
-; PPC32-NEXT:    addi 4, 25, 1
-; PPC32-NEXT:    add 28, 21, 28
-; PPC32-NEXT:    lbz 21, 131(1)
-; PPC32-NEXT:    stb 4, 10(3)
-; PPC32-NEXT:    addi 4, 26, 1
-; PPC32-NEXT:    add 29, 0, 29
-; PPC32-NEXT:    lbz 0, 127(1)
-; PPC32-NEXT:    stb 4, 9(3)
-; PPC32-NEXT:    addi 4, 27, 1
-; PPC32-NEXT:    add 10, 10, 24
-; PPC32-NEXT:    lbz 24, 123(1)
-; PPC32-NEXT:    stb 4, 8(3)
-; PPC32-NEXT:    addi 4, 28, 1
-; PPC32-NEXT:    lbz 30, 119(1)
-; PPC32-NEXT:    stb 4, 7(3)
-; PPC32-NEXT:    addi 4, 29, 1
-; PPC32-NEXT:    add 9, 9, 21
-; PPC32-NEXT:    lbz 21, 115(1)
-; PPC32-NEXT:    stb 4, 6(3)
-; PPC32-NEXT:    addi 4, 10, 1
-; PPC32-NEXT:    add 8, 8, 0
-; PPC32-NEXT:    stb 4, 5(3)
-; PPC32-NEXT:    addi 4, 9, 1
-; PPC32-NEXT:    add 7, 7, 24
-; PPC32-NEXT:    stb 4, 4(3)
-; PPC32-NEXT:    addi 4, 8, 1
-; PPC32-NEXT:    add 6, 6, 30
-; PPC32-NEXT:    stb 4, 3(3)
-; PPC32-NEXT:    addi 4, 7, 1
-; PPC32-NEXT:    add 5, 5, 21
-; PPC32-NEXT:    stb 4, 2(3)
-; PPC32-NEXT:    addi 4, 6, 1
+; PPC32-NEXT:    lbz 0, 107(1)
+; PPC32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 26, 23, 26
+; PPC32-NEXT:    lbz 30, 171(1)
+; PPC32-NEXT:    lbz 29, 103(1)
+; PPC32-NEXT:    lbz 23, 167(1)
+; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 25, 22, 25
+; PPC32-NEXT:    lbz 28, 99(1)
+; PPC32-NEXT:    lbz 22, 163(1)
+; PPC32-NEXT:    add 12, 21, 12
+; PPC32-NEXT:    add 30, 30, 0
 ; PPC32-NEXT:    addi 12, 12, 1
+; PPC32-NEXT:    add 29, 23, 29
+; PPC32-NEXT:    stb 12, 15(3)
+; PPC32-NEXT:    addi 12, 30, 1
+; PPC32-NEXT:    add 28, 22, 28
+; PPC32-NEXT:    stb 12, 14(3)
+; PPC32-NEXT:    addi 12, 29, 1
+; PPC32-NEXT:    stb 12, 13(3)
+; PPC32-NEXT:    addi 12, 28, 1
+; PPC32-NEXT:    stb 12, 12(3)
+; PPC32-NEXT:    addi 12, 27, 1
+; PPC32-NEXT:    stb 12, 11(3)
+; PPC32-NEXT:    addi 12, 26, 1
+; PPC32-NEXT:    addi 9, 9, 1
+; PPC32-NEXT:    addi 6, 6, 1
+; PPC32-NEXT:    stb 12, 10(3)
+; PPC32-NEXT:    addi 12, 25, 1
+; PPC32-NEXT:    stb 9, 7(3)
+; PPC32-NEXT:    addi 9, 11, 1
+; PPC32-NEXT:    stb 6, 4(3)
+; PPC32-NEXT:    addi 6, 8, 1
+; PPC32-NEXT:    addi 4, 4, 1
+; PPC32-NEXT:    stb 12, 9(3)
+; PPC32-NEXT:    addi 12, 24, 1
+; PPC32-NEXT:    stb 9, 6(3)
+; PPC32-NEXT:    addi 9, 10, 1
+; PPC32-NEXT:    stb 6, 3(3)
+; PPC32-NEXT:    addi 6, 7, 1
 ; PPC32-NEXT:    stb 4, 1(3)
 ; PPC32-NEXT:    addi 4, 5, 1
-; PPC32-NEXT:    stb 12, 15(3)
+; PPC32-NEXT:    stb 12, 8(3)
+; PPC32-NEXT:    stb 9, 5(3)
+; PPC32-NEXT:    stb 6, 2(3)
 ; PPC32-NEXT:    stb 4, 0(3)
 ; PPC32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
@@ -164,73 +164,72 @@ define <16 x i8> @vector_i128_i8(<16 x i
 ; PPC64BE-LABEL: vector_i128_i8:
 ; PPC64BE:       # %bb.0:
 ; PPC64BE-NEXT:    std 21, -88(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    lbz 21, 207(1)
+; PPC64BE-NEXT:    lbz 11, 199(1)
+; PPC64BE-NEXT:    lbz 12, 191(1)
+; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 22, -80(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 2, -96(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 25, -56(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 24, -64(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 11, 303(1)
-; PPC64BE-NEXT:    lbz 12, 175(1)
-; PPC64BE-NEXT:    lbz 0, 311(1)
-; PPC64BE-NEXT:    lbz 30, 183(1)
+; PPC64BE-NEXT:    lbz 0, 183(1)
+; PPC64BE-NEXT:    add 6, 21, 6
+; PPC64BE-NEXT:    lbz 21, 231(1)
+; PPC64BE-NEXT:    add 5, 11, 5
+; PPC64BE-NEXT:    lbz 11, 223(1)
+; PPC64BE-NEXT:    add 4, 12, 4
+; PPC64BE-NEXT:    lbz 12, 215(1)
+; PPC64BE-NEXT:    lbz 23, 127(1)
+; PPC64BE-NEXT:    add 9, 21, 9
+; PPC64BE-NEXT:    lbz 21, 255(1)
+; PPC64BE-NEXT:    lbz 22, 119(1)
+; PPC64BE-NEXT:    add 8, 11, 8
+; PPC64BE-NEXT:    lbz 11, 247(1)
+; PPC64BE-NEXT:    add 7, 12, 7
+; PPC64BE-NEXT:    lbz 12, 239(1)
+; PPC64BE-NEXT:    lbz 26, 151(1)
+; PPC64BE-NEXT:    add 2, 21, 23
+; PPC64BE-NEXT:    lbz 23, 279(1)
+; PPC64BE-NEXT:    lbz 25, 143(1)
+; PPC64BE-NEXT:    add 11, 11, 22
+; PPC64BE-NEXT:    lbz 22, 271(1)
+; PPC64BE-NEXT:    lbz 24, 135(1)
+; PPC64BE-NEXT:    add 10, 12, 10
+; PPC64BE-NEXT:    lbz 12, 263(1)
+; PPC64BE-NEXT:    lbz 30, 175(1)
+; PPC64BE-NEXT:    lbz 29, 303(1)
+; PPC64BE-NEXT:    add 26, 23, 26
+; PPC64BE-NEXT:    lbz 23, 311(1)
+; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    add 25, 22, 25
+; PPC64BE-NEXT:    lbz 28, 167(1)
 ; PPC64BE-NEXT:    lbz 22, 295(1)
-; PPC64BE-NEXT:    lbz 21, 167(1)
-; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 24, -64(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 25, -56(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 2, -96(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    add 11, 12, 11
-; PPC64BE-NEXT:    lbz 25, 271(1)
-; PPC64BE-NEXT:    lbz 24, 279(1)
-; PPC64BE-NEXT:    lbz 23, 287(1)
-; PPC64BE-NEXT:    add 2, 30, 0
-; PPC64BE-NEXT:    lbz 0, 159(1)
-; PPC64BE-NEXT:    lbz 12, 151(1)
-; PPC64BE-NEXT:    add 30, 21, 22
-; PPC64BE-NEXT:    lbz 22, 143(1)
-; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    add 23, 0, 23
-; PPC64BE-NEXT:    lbz 28, 247(1)
-; PPC64BE-NEXT:    lbz 27, 255(1)
-; PPC64BE-NEXT:    lbz 26, 263(1)
-; PPC64BE-NEXT:    lbz 0, 135(1)
 ; PPC64BE-NEXT:    add 12, 12, 24
-; PPC64BE-NEXT:    lbz 24, 127(1)
-; PPC64BE-NEXT:    add 25, 22, 25
-; PPC64BE-NEXT:    lbz 22, 119(1)
-; PPC64BE-NEXT:    add 26, 0, 26
-; PPC64BE-NEXT:    lbz 0, 239(1)
-; PPC64BE-NEXT:    add 27, 24, 27
-; PPC64BE-NEXT:    lbz 24, 231(1)
+; PPC64BE-NEXT:    lbz 27, 159(1)
+; PPC64BE-NEXT:    lbz 24, 287(1)
+; PPC64BE-NEXT:    add 30, 29, 30
+; PPC64BE-NEXT:    add 29, 23, 0
+; PPC64BE-NEXT:    addi 0, 29, 1
 ; PPC64BE-NEXT:    add 28, 22, 28
-; PPC64BE-NEXT:    lbz 22, 223(1)
+; PPC64BE-NEXT:    stb 0, 15(3)
+; PPC64BE-NEXT:    addi 0, 30, 1
+; PPC64BE-NEXT:    add 27, 24, 27
+; PPC64BE-NEXT:    stb 0, 14(3)
+; PPC64BE-NEXT:    addi 0, 28, 1
+; PPC64BE-NEXT:    stb 0, 13(3)
+; PPC64BE-NEXT:    addi 0, 27, 1
+; PPC64BE-NEXT:    stb 0, 12(3)
+; PPC64BE-NEXT:    addi 0, 26, 1
+; PPC64BE-NEXT:    addi 12, 12, 1
+; PPC64BE-NEXT:    stb 0, 11(3)
+; PPC64BE-NEXT:    addi 0, 25, 1
+; PPC64BE-NEXT:    stb 12, 9(3)
+; PPC64BE-NEXT:    addi 12, 2, 1
 ; PPC64BE-NEXT:    addi 11, 11, 1
-; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    stb 11, 14(3)
-; PPC64BE-NEXT:    addi 11, 30, 1
-; PPC64BE-NEXT:    lbz 29, 199(1)
-; PPC64BE-NEXT:    add 10, 10, 0
-; PPC64BE-NEXT:    lbz 0, 215(1)
-; PPC64BE-NEXT:    add 9, 9, 24
-; PPC64BE-NEXT:    lbz 24, 207(1)
-; PPC64BE-NEXT:    add 8, 8, 22
-; PPC64BE-NEXT:    lbz 22, 191(1)
-; PPC64BE-NEXT:    stb 11, 13(3)
-; PPC64BE-NEXT:    addi 11, 23, 1
-; PPC64BE-NEXT:    stb 11, 12(3)
-; PPC64BE-NEXT:    addi 11, 12, 1
-; PPC64BE-NEXT:    stb 11, 11(3)
-; PPC64BE-NEXT:    addi 11, 25, 1
-; PPC64BE-NEXT:    stb 11, 10(3)
-; PPC64BE-NEXT:    addi 11, 26, 1
-; PPC64BE-NEXT:    add 7, 7, 0
-; PPC64BE-NEXT:    add 6, 6, 24
-; PPC64BE-NEXT:    add 5, 5, 29
-; PPC64BE-NEXT:    add 4, 4, 22
-; PPC64BE-NEXT:    stb 11, 9(3)
-; PPC64BE-NEXT:    addi 11, 27, 1
-; PPC64BE-NEXT:    addi 0, 2, 1
-; PPC64BE-NEXT:    stb 11, 8(3)
-; PPC64BE-NEXT:    addi 11, 28, 1
 ; PPC64BE-NEXT:    addi 10, 10, 1
 ; PPC64BE-NEXT:    addi 9, 9, 1
 ; PPC64BE-NEXT:    addi 8, 8, 1
@@ -238,7 +237,8 @@ define <16 x i8> @vector_i128_i8(<16 x i
 ; PPC64BE-NEXT:    addi 6, 6, 1
 ; PPC64BE-NEXT:    addi 5, 5, 1
 ; PPC64BE-NEXT:    addi 4, 4, 1
-; PPC64BE-NEXT:    stb 0, 15(3)
+; PPC64BE-NEXT:    stb 0, 10(3)
+; PPC64BE-NEXT:    stb 12, 8(3)
 ; PPC64BE-NEXT:    stb 11, 7(3)
 ; PPC64BE-NEXT:    stb 10, 6(3)
 ; PPC64BE-NEXT:    stb 9, 5(3)
@@ -262,9 +262,8 @@ define <16 x i8> @vector_i128_i8(<16 x i
 ;
 ; PPC64LE-LABEL: vector_i128_i8:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    vspltisb 4, 1
-; PPC64LE-NEXT:    vaddubm 2, 2, 3
-; PPC64LE-NEXT:    vaddubm 2, 2, 4
+; PPC64LE-NEXT:    xxlnor 34, 34, 34
+; PPC64LE-NEXT:    vsububm 2, 3, 2
 ; PPC64LE-NEXT:    blr
   %t0 = add <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
   %t1 = add <16 x i8> %y, %t0
@@ -280,22 +279,22 @@ define <8 x i16> @vector_i128_i16(<8 x i
 ; PPC32-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lhz 11, 62(1)
-; PPC32-NEXT:    lhz 12, 66(1)
-; PPC32-NEXT:    lhz 0, 70(1)
-; PPC32-NEXT:    lhz 30, 42(1)
-; PPC32-NEXT:    lhz 29, 46(1)
-; PPC32-NEXT:    lhz 28, 50(1)
-; PPC32-NEXT:    lhz 27, 54(1)
-; PPC32-NEXT:    lhz 26, 58(1)
-; PPC32-NEXT:    add 10, 10, 0
-; PPC32-NEXT:    add 9, 9, 12
-; PPC32-NEXT:    add 8, 8, 11
-; PPC32-NEXT:    add 7, 7, 26
-; PPC32-NEXT:    add 6, 6, 27
-; PPC32-NEXT:    add 5, 5, 28
-; PPC32-NEXT:    add 4, 4, 29
-; PPC32-NEXT:    add 3, 3, 30
+; PPC32-NEXT:    lhz 11, 50(1)
+; PPC32-NEXT:    lhz 12, 46(1)
+; PPC32-NEXT:    lhz 0, 42(1)
+; PPC32-NEXT:    lhz 30, 70(1)
+; PPC32-NEXT:    lhz 29, 66(1)
+; PPC32-NEXT:    lhz 28, 62(1)
+; PPC32-NEXT:    lhz 27, 58(1)
+; PPC32-NEXT:    lhz 26, 54(1)
+; PPC32-NEXT:    add 3, 0, 3
+; PPC32-NEXT:    add 4, 12, 4
+; PPC32-NEXT:    add 5, 11, 5
+; PPC32-NEXT:    add 6, 26, 6
+; PPC32-NEXT:    add 7, 27, 7
+; PPC32-NEXT:    add 8, 28, 8
+; PPC32-NEXT:    add 9, 29, 9
+; PPC32-NEXT:    add 10, 30, 10
 ; PPC32-NEXT:    addi 3, 3, 1
 ; PPC32-NEXT:    addi 4, 4, 1
 ; PPC32-NEXT:    addi 5, 5, 1
@@ -320,24 +319,23 @@ define <8 x i16> @vector_i128_i16(<8 x i
 ; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lhz 11, 166(1)
-; PPC64BE-NEXT:    lhz 12, 174(1)
-; PPC64BE-NEXT:    lhz 0, 182(1)
+; PPC64BE-NEXT:    lhz 11, 142(1)
+; PPC64BE-NEXT:    lhz 12, 134(1)
+; PPC64BE-NEXT:    lhz 0, 126(1)
 ; PPC64BE-NEXT:    lhz 30, 118(1)
-; PPC64BE-NEXT:    lhz 29, 126(1)
-; PPC64BE-NEXT:    lhz 28, 134(1)
-; PPC64BE-NEXT:    lhz 27, 142(1)
-; PPC64BE-NEXT:    lhz 26, 150(1)
-; PPC64BE-NEXT:    lhz 25, 158(1)
-; PPC64BE-NEXT:    std 2, -64(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    add 2, 30, 0
-; PPC64BE-NEXT:    add 10, 10, 12
-; PPC64BE-NEXT:    add 9, 9, 11
-; PPC64BE-NEXT:    add 8, 8, 25
-; PPC64BE-NEXT:    add 7, 7, 26
-; PPC64BE-NEXT:    add 6, 6, 27
-; PPC64BE-NEXT:    add 5, 5, 28
-; PPC64BE-NEXT:    add 4, 4, 29
+; PPC64BE-NEXT:    lhz 29, 182(1)
+; PPC64BE-NEXT:    lhz 28, 174(1)
+; PPC64BE-NEXT:    lhz 27, 166(1)
+; PPC64BE-NEXT:    lhz 26, 158(1)
+; PPC64BE-NEXT:    lhz 25, 150(1)
+; PPC64BE-NEXT:    add 4, 0, 4
+; PPC64BE-NEXT:    add 5, 12, 5
+; PPC64BE-NEXT:    add 6, 11, 6
+; PPC64BE-NEXT:    add 7, 25, 7
+; PPC64BE-NEXT:    add 8, 26, 8
+; PPC64BE-NEXT:    add 9, 27, 9
+; PPC64BE-NEXT:    add 10, 28, 10
+; PPC64BE-NEXT:    add 11, 29, 30
 ; PPC64BE-NEXT:    addi 4, 4, 1
 ; PPC64BE-NEXT:    addi 5, 5, 1
 ; PPC64BE-NEXT:    addi 6, 6, 1
@@ -345,7 +343,7 @@ define <8 x i16> @vector_i128_i16(<8 x i
 ; PPC64BE-NEXT:    addi 8, 8, 1
 ; PPC64BE-NEXT:    addi 9, 9, 1
 ; PPC64BE-NEXT:    addi 10, 10, 1
-; PPC64BE-NEXT:    addi 11, 2, 1
+; PPC64BE-NEXT:    addi 11, 11, 1
 ; PPC64BE-NEXT:    sth 11, 14(3)
 ; PPC64BE-NEXT:    sth 10, 12(3)
 ; PPC64BE-NEXT:    sth 9, 10(3)
@@ -354,7 +352,6 @@ define <8 x i16> @vector_i128_i16(<8 x i
 ; PPC64BE-NEXT:    sth 6, 4(3)
 ; PPC64BE-NEXT:    sth 5, 2(3)
 ; PPC64BE-NEXT:    sth 4, 0(3)
-; PPC64BE-NEXT:    ld 2, -64(1) # 8-byte Folded Reload
 ; PPC64BE-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
 ; PPC64BE-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
 ; PPC64BE-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
@@ -365,9 +362,8 @@ define <8 x i16> @vector_i128_i16(<8 x i
 ;
 ; PPC64LE-LABEL: vector_i128_i16:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    vspltish 4, 1
-; PPC64LE-NEXT:    vadduhm 2, 2, 3
-; PPC64LE-NEXT:    vadduhm 2, 2, 4
+; PPC64LE-NEXT:    xxlnor 34, 34, 34
+; PPC64LE-NEXT:    vsubuhm 2, 3, 2
 ; PPC64LE-NEXT:    blr
   %t0 = add <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
   %t1 = add <8 x i16> %y, %t0
@@ -377,10 +373,10 @@ define <8 x i16> @vector_i128_i16(<8 x i
 define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; PPC32-LABEL: vector_i128_i32:
 ; PPC32:       # %bb.0:
-; PPC32-NEXT:    add 6, 6, 10
-; PPC32-NEXT:    add 5, 5, 9
-; PPC32-NEXT:    add 4, 4, 8
-; PPC32-NEXT:    add 3, 3, 7
+; PPC32-NEXT:    add 3, 7, 3
+; PPC32-NEXT:    add 4, 8, 4
+; PPC32-NEXT:    add 5, 9, 5
+; PPC32-NEXT:    add 6, 10, 6
 ; PPC32-NEXT:    addi 3, 3, 1
 ; PPC32-NEXT:    addi 4, 4, 1
 ; PPC32-NEXT:    addi 5, 5, 1
@@ -389,10 +385,10 @@ define <4 x i32> @vector_i128_i32(<4 x i
 ;
 ; PPC64BE-LABEL: vector_i128_i32:
 ; PPC64BE:       # %bb.0:
-; PPC64BE-NEXT:    add 3, 3, 7
-; PPC64BE-NEXT:    add 4, 4, 8
-; PPC64BE-NEXT:    add 5, 5, 9
-; PPC64BE-NEXT:    add 6, 6, 10
+; PPC64BE-NEXT:    add 6, 10, 6
+; PPC64BE-NEXT:    add 5, 9, 5
+; PPC64BE-NEXT:    add 4, 8, 4
+; PPC64BE-NEXT:    add 3, 7, 3
 ; PPC64BE-NEXT:    addi 6, 6, 1
 ; PPC64BE-NEXT:    addi 5, 5, 1
 ; PPC64BE-NEXT:    addi 4, 4, 1
@@ -401,9 +397,8 @@ define <4 x i32> @vector_i128_i32(<4 x i
 ;
 ; PPC64LE-LABEL: vector_i128_i32:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    vspltisw 4, 1
-; PPC64LE-NEXT:    vadduwm 2, 2, 3
-; PPC64LE-NEXT:    vadduwm 2, 2, 4
+; PPC64LE-NEXT:    xxlnor 34, 34, 34
+; PPC64LE-NEXT:    vsubuwm 2, 3, 2
 ; PPC64LE-NEXT:    blr
   %t0 = add <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
   %t1 = add <4 x i32> %y, %t0
@@ -413,32 +408,28 @@ define <4 x i32> @vector_i128_i32(<4 x i
 define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y) nounwind {
 ; PPC32-LABEL: vector_i128_i64:
 ; PPC32:       # %bb.0:
-; PPC32-NEXT:    addc 6, 6, 10
-; PPC32-NEXT:    adde 5, 5, 9
-; PPC32-NEXT:    addc 4, 4, 8
-; PPC32-NEXT:    adde 3, 3, 7
-; PPC32-NEXT:    addic 4, 4, 1
-; PPC32-NEXT:    addze 3, 3
-; PPC32-NEXT:    addic 6, 6, 1
-; PPC32-NEXT:    addze 5, 5
+; PPC32-NEXT:    nor 4, 4, 4
+; PPC32-NEXT:    nor 3, 3, 3
+; PPC32-NEXT:    subfc 4, 4, 8
+; PPC32-NEXT:    nor 6, 6, 6
+; PPC32-NEXT:    subfe 3, 3, 7
+; PPC32-NEXT:    nor 5, 5, 5
+; PPC32-NEXT:    subfc 6, 6, 10
+; PPC32-NEXT:    subfe 5, 5, 9
 ; PPC32-NEXT:    blr
 ;
 ; PPC64BE-LABEL: vector_i128_i64:
 ; PPC64BE:       # %bb.0:
-; PPC64BE-NEXT:    add 4, 4, 6
-; PPC64BE-NEXT:    add 3, 3, 5
+; PPC64BE-NEXT:    add 3, 5, 3
+; PPC64BE-NEXT:    add 4, 6, 4
 ; PPC64BE-NEXT:    addi 3, 3, 1
 ; PPC64BE-NEXT:    addi 4, 4, 1
 ; PPC64BE-NEXT:    blr
 ;
 ; PPC64LE-LABEL: vector_i128_i64:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    addis 3, 2, .LCPI7_0 at toc@ha
-; PPC64LE-NEXT:    vaddudm 2, 2, 3
-; PPC64LE-NEXT:    addi 3, 3, .LCPI7_0 at toc@l
-; PPC64LE-NEXT:    lxvd2x 0, 0, 3
-; PPC64LE-NEXT:    xxswapd 36, 0
-; PPC64LE-NEXT:    vaddudm 2, 2, 4
+; PPC64LE-NEXT:    xxlnor 34, 34, 34
+; PPC64LE-NEXT:    vsubudm 2, 3, 2
 ; PPC64LE-NEXT:    blr
   %t0 = add <2 x i64> %x, <i64 1, i64 1>
   %t1 = add <2 x i64> %y, %t0

Modified: llvm/trunk/test/CodeGen/PowerPC/sub-of-not.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/sub-of-not.ll?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/PowerPC/sub-of-not.ll (original)
+++ llvm/trunk/test/CodeGen/PowerPC/sub-of-not.ll Wed Jul  3 02:41:35 2019
@@ -11,8 +11,8 @@
 define i8 @scalar_i8(i8 %x, i8 %y) nounwind {
 ; ALL-LABEL: scalar_i8:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    nor 3, 3, 3
-; ALL-NEXT:    subf 3, 3, 4
+; ALL-NEXT:    add 3, 4, 3
+; ALL-NEXT:    addi 3, 3, 1
 ; ALL-NEXT:    blr
   %t0 = xor i8 %x, -1
   %t1 = sub i8 %y, %t0
@@ -22,8 +22,8 @@ define i8 @scalar_i8(i8 %x, i8 %y) nounw
 define i16 @scalar_i16(i16 %x, i16 %y) nounwind {
 ; ALL-LABEL: scalar_i16:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    nor 3, 3, 3
-; ALL-NEXT:    subf 3, 3, 4
+; ALL-NEXT:    add 3, 4, 3
+; ALL-NEXT:    addi 3, 3, 1
 ; ALL-NEXT:    blr
   %t0 = xor i16 %x, -1
   %t1 = sub i16 %y, %t0
@@ -33,8 +33,8 @@ define i16 @scalar_i16(i16 %x, i16 %y) n
 define i32 @scalar_i32(i32 %x, i32 %y) nounwind {
 ; ALL-LABEL: scalar_i32:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    nor 3, 3, 3
-; ALL-NEXT:    subf 3, 3, 4
+; ALL-NEXT:    add 3, 4, 3
+; ALL-NEXT:    addi 3, 3, 1
 ; ALL-NEXT:    blr
   %t0 = xor i32 %x, -1
   %t1 = sub i32 %y, %t0
@@ -44,16 +44,16 @@ define i32 @scalar_i32(i32 %x, i32 %y) n
 define i64 @scalar_i64(i64 %x, i64 %y) nounwind {
 ; PPC32-LABEL: scalar_i64:
 ; PPC32:       # %bb.0:
-; PPC32-NEXT:    nor 4, 4, 4
-; PPC32-NEXT:    nor 3, 3, 3
-; PPC32-NEXT:    subfc 4, 4, 6
-; PPC32-NEXT:    subfe 3, 3, 5
+; PPC32-NEXT:    addc 4, 6, 4
+; PPC32-NEXT:    adde 3, 5, 3
+; PPC32-NEXT:    addic 4, 4, 1
+; PPC32-NEXT:    addze 3, 3
 ; PPC32-NEXT:    blr
 ;
 ; PPC64-LABEL: scalar_i64:
 ; PPC64:       # %bb.0:
-; PPC64-NEXT:    not 3, 3
-; PPC64-NEXT:    sub 3, 4, 3
+; PPC64-NEXT:    add 3, 4, 3
+; PPC64-NEXT:    addi 3, 3, 1
 ; PPC64-NEXT:    blr
   %t0 = xor i64 %x, -1
   %t1 = sub i64 %y, %t0
@@ -63,183 +63,191 @@ define i64 @scalar_i64(i64 %x, i64 %y) n
 define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; PPC32-LABEL: vector_i128_i8:
 ; PPC32:       # %bb.0:
-; PPC32-NEXT:    stwu 1, -48(1)
-; PPC32-NEXT:    lbz 4, 99(1)
-; PPC32-NEXT:    stw 23, 12(1) # 4-byte Folded Spill
-; PPC32-NEXT:    nor 5, 5, 5
-; PPC32-NEXT:    lbz 23, 103(1)
-; PPC32-NEXT:    subf 4, 5, 4
-; PPC32-NEXT:    lbz 5, 107(1)
-; PPC32-NEXT:    nor 6, 6, 6
-; PPC32-NEXT:    subf 6, 6, 23
-; PPC32-NEXT:    lbz 23, 111(1)
-; PPC32-NEXT:    nor 7, 7, 7
-; PPC32-NEXT:    subf 5, 7, 5
-; PPC32-NEXT:    lbz 7, 115(1)
-; PPC32-NEXT:    nor 8, 8, 8
-; PPC32-NEXT:    stw 24, 16(1) # 4-byte Folded Spill
-; PPC32-NEXT:    subf 8, 8, 23
-; PPC32-NEXT:    lbz 24, 119(1)
-; PPC32-NEXT:    lbz 23, 59(1)
-; PPC32-NEXT:    nor 9, 9, 9
-; PPC32-NEXT:    stw 25, 20(1) # 4-byte Folded Spill
-; PPC32-NEXT:    subf 7, 9, 7
-; PPC32-NEXT:    lbz 25, 123(1)
-; PPC32-NEXT:    lbz 9, 63(1)
-; PPC32-NEXT:    stw 26, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    nor 10, 10, 10
-; PPC32-NEXT:    lbz 26, 127(1)
-; PPC32-NEXT:    subf 10, 10, 24
-; PPC32-NEXT:    lbz 24, 67(1)
-; PPC32-NEXT:    nor 23, 23, 23
-; PPC32-NEXT:    stw 27, 28(1) # 4-byte Folded Spill
-; PPC32-NEXT:    subf 25, 23, 25
-; PPC32-NEXT:    lbz 27, 131(1)
-; PPC32-NEXT:    lbz 23, 71(1)
-; PPC32-NEXT:    nor 9, 9, 9
-; PPC32-NEXT:    stw 28, 32(1) # 4-byte Folded Spill
-; PPC32-NEXT:    subf 9, 9, 26
-; PPC32-NEXT:    lbz 28, 135(1)
-; PPC32-NEXT:    lbz 26, 75(1)
-; PPC32-NEXT:    stw 29, 36(1) # 4-byte Folded Spill
-; PPC32-NEXT:    nor 24, 24, 24
-; PPC32-NEXT:    lbz 29, 139(1)
-; PPC32-NEXT:    subf 27, 24, 27
-; PPC32-NEXT:    lbz 24, 79(1)
-; PPC32-NEXT:    nor 23, 23, 23
-; PPC32-NEXT:    stw 30, 40(1) # 4-byte Folded Spill
-; PPC32-NEXT:    subf 28, 23, 28
-; PPC32-NEXT:    lbz 30, 143(1)
-; PPC32-NEXT:    lbz 23, 83(1)
-; PPC32-NEXT:    nor 26, 26, 26
-; PPC32-NEXT:    lbz 0, 147(1)
-; PPC32-NEXT:    subf 29, 26, 29
-; PPC32-NEXT:    lbz 26, 87(1)
-; PPC32-NEXT:    lbz 12, 151(1)
-; PPC32-NEXT:    nor 24, 24, 24
-; PPC32-NEXT:    subf 30, 24, 30
-; PPC32-NEXT:    lbz 24, 91(1)
-; PPC32-NEXT:    nor 23, 23, 23
-; PPC32-NEXT:    lbz 11, 155(1)
-; PPC32-NEXT:    subf 0, 23, 0
-; PPC32-NEXT:    lbz 23, 95(1)
-; PPC32-NEXT:    nor 26, 26, 26
-; PPC32-NEXT:    subf 12, 26, 12
-; PPC32-NEXT:    lbz 26, 159(1)
-; PPC32-NEXT:    nor 24, 24, 24
-; PPC32-NEXT:    subf 11, 24, 11
-; PPC32-NEXT:    nor 24, 23, 23
-; PPC32-NEXT:    subf 26, 24, 26
-; PPC32-NEXT:    stb 10, 5(3)
-; PPC32-NEXT:    stb 7, 4(3)
-; PPC32-NEXT:    stb 8, 3(3)
-; PPC32-NEXT:    stb 5, 2(3)
-; PPC32-NEXT:    stb 6, 1(3)
-; PPC32-NEXT:    stb 26, 15(3)
-; PPC32-NEXT:    stb 11, 14(3)
+; PPC32-NEXT:    stwu 1, -64(1)
+; PPC32-NEXT:    stw 21, 20(1) # 4-byte Folded Spill
+; PPC32-NEXT:    lbz 4, 119(1)
+; PPC32-NEXT:    lbz 11, 115(1)
+; PPC32-NEXT:    stw 24, 32(1) # 4-byte Folded Spill
+; PPC32-NEXT:    stw 23, 28(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 4, 4, 6
+; PPC32-NEXT:    lbz 21, 123(1)
+; PPC32-NEXT:    lbz 6, 131(1)
+; PPC32-NEXT:    add 5, 11, 5
+; PPC32-NEXT:    lbz 11, 127(1)
+; PPC32-NEXT:    add 7, 21, 7
+; PPC32-NEXT:    lbz 21, 135(1)
+; PPC32-NEXT:    lbz 24, 83(1)
+; PPC32-NEXT:    lbz 23, 79(1)
+; PPC32-NEXT:    add 6, 6, 9
+; PPC32-NEXT:    add 10, 21, 10
+; PPC32-NEXT:    lbz 21, 147(1)
+; PPC32-NEXT:    lbz 9, 143(1)
+; PPC32-NEXT:    stw 22, 24(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 8, 11, 8
+; PPC32-NEXT:    lbz 22, 75(1)
+; PPC32-NEXT:    lbz 11, 139(1)
+; PPC32-NEXT:    stw 27, 44(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 24, 21, 24
+; PPC32-NEXT:    lbz 27, 95(1)
+; PPC32-NEXT:    lbz 21, 159(1)
+; PPC32-NEXT:    stw 26, 40(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 9, 9, 23
+; PPC32-NEXT:    lbz 26, 91(1)
+; PPC32-NEXT:    lbz 23, 155(1)
+; PPC32-NEXT:    stw 25, 36(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 11, 11, 22
+; PPC32-NEXT:    lbz 25, 87(1)
+; PPC32-NEXT:    lbz 22, 151(1)
+; PPC32-NEXT:    lbz 12, 111(1)
+; PPC32-NEXT:    add 27, 21, 27
+; PPC32-NEXT:    lbz 21, 175(1)
+; PPC32-NEXT:    stw 30, 56(1) # 4-byte Folded Spill
+; PPC32-NEXT:    lbz 0, 107(1)
+; PPC32-NEXT:    stw 29, 52(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 26, 23, 26
+; PPC32-NEXT:    lbz 30, 171(1)
+; PPC32-NEXT:    lbz 29, 103(1)
+; PPC32-NEXT:    lbz 23, 167(1)
+; PPC32-NEXT:    stw 28, 48(1) # 4-byte Folded Spill
+; PPC32-NEXT:    add 25, 22, 25
+; PPC32-NEXT:    lbz 28, 99(1)
+; PPC32-NEXT:    lbz 22, 163(1)
+; PPC32-NEXT:    add 12, 21, 12
+; PPC32-NEXT:    add 30, 30, 0
+; PPC32-NEXT:    addi 12, 12, 1
+; PPC32-NEXT:    add 29, 23, 29
+; PPC32-NEXT:    stb 12, 15(3)
+; PPC32-NEXT:    addi 12, 30, 1
+; PPC32-NEXT:    add 28, 22, 28
+; PPC32-NEXT:    stb 12, 14(3)
+; PPC32-NEXT:    addi 12, 29, 1
 ; PPC32-NEXT:    stb 12, 13(3)
-; PPC32-NEXT:    stb 0, 12(3)
-; PPC32-NEXT:    stb 30, 11(3)
-; PPC32-NEXT:    stb 29, 10(3)
-; PPC32-NEXT:    stb 28, 9(3)
-; PPC32-NEXT:    stb 27, 8(3)
+; PPC32-NEXT:    addi 12, 28, 1
+; PPC32-NEXT:    stb 12, 12(3)
+; PPC32-NEXT:    addi 12, 27, 1
+; PPC32-NEXT:    stb 12, 11(3)
+; PPC32-NEXT:    addi 12, 26, 1
+; PPC32-NEXT:    addi 9, 9, 1
+; PPC32-NEXT:    addi 6, 6, 1
+; PPC32-NEXT:    stb 12, 10(3)
+; PPC32-NEXT:    addi 12, 25, 1
 ; PPC32-NEXT:    stb 9, 7(3)
-; PPC32-NEXT:    stb 25, 6(3)
+; PPC32-NEXT:    addi 9, 11, 1
+; PPC32-NEXT:    stb 6, 4(3)
+; PPC32-NEXT:    addi 6, 8, 1
+; PPC32-NEXT:    addi 4, 4, 1
+; PPC32-NEXT:    stb 12, 9(3)
+; PPC32-NEXT:    addi 12, 24, 1
+; PPC32-NEXT:    stb 9, 6(3)
+; PPC32-NEXT:    addi 9, 10, 1
+; PPC32-NEXT:    stb 6, 3(3)
+; PPC32-NEXT:    addi 6, 7, 1
+; PPC32-NEXT:    stb 4, 1(3)
+; PPC32-NEXT:    addi 4, 5, 1
+; PPC32-NEXT:    stb 12, 8(3)
+; PPC32-NEXT:    stb 9, 5(3)
+; PPC32-NEXT:    stb 6, 2(3)
 ; PPC32-NEXT:    stb 4, 0(3)
-; PPC32-NEXT:    lwz 30, 40(1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz 29, 36(1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz 28, 32(1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz 27, 28(1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz 26, 24(1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz 25, 20(1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz 24, 16(1) # 4-byte Folded Reload
-; PPC32-NEXT:    lwz 23, 12(1) # 4-byte Folded Reload
-; PPC32-NEXT:    addi 1, 1, 48
+; PPC32-NEXT:    lwz 30, 56(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 29, 52(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 28, 48(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 27, 44(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 26, 40(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 25, 36(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 24, 32(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 23, 28(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 22, 24(1) # 4-byte Folded Reload
+; PPC32-NEXT:    lwz 21, 20(1) # 4-byte Folded Reload
+; PPC32-NEXT:    addi 1, 1, 64
 ; PPC32-NEXT:    blr
 ;
 ; PPC64BE-LABEL: vector_i128_i8:
 ; PPC64BE:       # %bb.0:
-; PPC64BE-NEXT:    lbz 11, 191(1)
-; PPC64BE-NEXT:    nor 4, 4, 4
+; PPC64BE-NEXT:    std 21, -88(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    lbz 21, 207(1)
+; PPC64BE-NEXT:    lbz 11, 199(1)
+; PPC64BE-NEXT:    lbz 12, 191(1)
 ; PPC64BE-NEXT:    std 23, -72(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 23, 199(1)
-; PPC64BE-NEXT:    nor 5, 5, 5
-; PPC64BE-NEXT:    subf 4, 4, 11
-; PPC64BE-NEXT:    lbz 11, 207(1)
-; PPC64BE-NEXT:    nor 6, 6, 6
-; PPC64BE-NEXT:    subf 5, 5, 23
-; PPC64BE-NEXT:    lbz 23, 215(1)
-; PPC64BE-NEXT:    subf 6, 6, 11
-; PPC64BE-NEXT:    lbz 11, 223(1)
-; PPC64BE-NEXT:    nor 7, 7, 7
-; PPC64BE-NEXT:    nor 8, 8, 8
-; PPC64BE-NEXT:    std 24, -64(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 24, 239(1)
-; PPC64BE-NEXT:    subf 7, 7, 23
-; PPC64BE-NEXT:    lbz 23, 231(1)
-; PPC64BE-NEXT:    subf 8, 8, 11
-; PPC64BE-NEXT:    lbz 11, 119(1)
+; PPC64BE-NEXT:    std 22, -80(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 2, -96(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 25, -56(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    nor 9, 9, 9
-; PPC64BE-NEXT:    lbz 25, 247(1)
-; PPC64BE-NEXT:    nor 10, 10, 10
-; PPC64BE-NEXT:    subf 9, 9, 23
+; PPC64BE-NEXT:    std 24, -64(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    lbz 0, 183(1)
+; PPC64BE-NEXT:    add 6, 21, 6
+; PPC64BE-NEXT:    lbz 21, 231(1)
+; PPC64BE-NEXT:    add 5, 11, 5
+; PPC64BE-NEXT:    lbz 11, 223(1)
+; PPC64BE-NEXT:    add 4, 12, 4
+; PPC64BE-NEXT:    lbz 12, 215(1)
 ; PPC64BE-NEXT:    lbz 23, 127(1)
-; PPC64BE-NEXT:    subf 10, 10, 24
-; PPC64BE-NEXT:    lbz 24, 135(1)
-; PPC64BE-NEXT:    nor 11, 11, 11
-; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 27, 263(1)
-; PPC64BE-NEXT:    lbz 26, 255(1)
-; PPC64BE-NEXT:    subf 11, 11, 25
+; PPC64BE-NEXT:    add 9, 21, 9
+; PPC64BE-NEXT:    lbz 21, 255(1)
+; PPC64BE-NEXT:    lbz 22, 119(1)
+; PPC64BE-NEXT:    add 8, 11, 8
+; PPC64BE-NEXT:    lbz 11, 247(1)
+; PPC64BE-NEXT:    add 7, 12, 7
+; PPC64BE-NEXT:    lbz 12, 239(1)
+; PPC64BE-NEXT:    lbz 26, 151(1)
+; PPC64BE-NEXT:    add 2, 21, 23
+; PPC64BE-NEXT:    lbz 23, 279(1)
 ; PPC64BE-NEXT:    lbz 25, 143(1)
+; PPC64BE-NEXT:    add 11, 11, 22
+; PPC64BE-NEXT:    lbz 22, 271(1)
+; PPC64BE-NEXT:    lbz 24, 135(1)
+; PPC64BE-NEXT:    add 10, 12, 10
+; PPC64BE-NEXT:    lbz 12, 263(1)
+; PPC64BE-NEXT:    lbz 30, 175(1)
+; PPC64BE-NEXT:    lbz 29, 303(1)
+; PPC64BE-NEXT:    add 26, 23, 26
+; PPC64BE-NEXT:    lbz 23, 311(1)
 ; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    nor 23, 23, 23
-; PPC64BE-NEXT:    lbz 28, 271(1)
-; PPC64BE-NEXT:    nor 24, 24, 24
-; PPC64BE-NEXT:    subf 26, 23, 26
-; PPC64BE-NEXT:    lbz 23, 151(1)
-; PPC64BE-NEXT:    subf 27, 24, 27
-; PPC64BE-NEXT:    lbz 24, 159(1)
-; PPC64BE-NEXT:    nor 25, 25, 25
-; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lbz 30, 287(1)
-; PPC64BE-NEXT:    lbz 29, 279(1)
-; PPC64BE-NEXT:    subf 28, 25, 28
-; PPC64BE-NEXT:    lbz 25, 167(1)
-; PPC64BE-NEXT:    lbz 0, 295(1)
-; PPC64BE-NEXT:    nor 23, 23, 23
-; PPC64BE-NEXT:    nor 24, 24, 24
-; PPC64BE-NEXT:    subf 29, 23, 29
-; PPC64BE-NEXT:    lbz 23, 175(1)
-; PPC64BE-NEXT:    subf 30, 24, 30
-; PPC64BE-NEXT:    lbz 24, 183(1)
-; PPC64BE-NEXT:    nor 25, 25, 25
-; PPC64BE-NEXT:    lbz 12, 303(1)
-; PPC64BE-NEXT:    subf 0, 25, 0
-; PPC64BE-NEXT:    lbz 25, 311(1)
-; PPC64BE-NEXT:    nor 23, 23, 23
-; PPC64BE-NEXT:    nor 24, 24, 24
-; PPC64BE-NEXT:    subf 12, 23, 12
-; PPC64BE-NEXT:    subf 25, 24, 25
+; PPC64BE-NEXT:    add 25, 22, 25
+; PPC64BE-NEXT:    lbz 28, 167(1)
+; PPC64BE-NEXT:    lbz 22, 295(1)
+; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
+; PPC64BE-NEXT:    add 12, 12, 24
+; PPC64BE-NEXT:    lbz 27, 159(1)
+; PPC64BE-NEXT:    lbz 24, 287(1)
+; PPC64BE-NEXT:    add 30, 29, 30
+; PPC64BE-NEXT:    add 29, 23, 0
+; PPC64BE-NEXT:    addi 0, 29, 1
+; PPC64BE-NEXT:    add 28, 22, 28
+; PPC64BE-NEXT:    stb 0, 15(3)
+; PPC64BE-NEXT:    addi 0, 30, 1
+; PPC64BE-NEXT:    add 27, 24, 27
+; PPC64BE-NEXT:    stb 0, 14(3)
+; PPC64BE-NEXT:    addi 0, 28, 1
+; PPC64BE-NEXT:    stb 0, 13(3)
+; PPC64BE-NEXT:    addi 0, 27, 1
+; PPC64BE-NEXT:    stb 0, 12(3)
+; PPC64BE-NEXT:    addi 0, 26, 1
+; PPC64BE-NEXT:    addi 12, 12, 1
+; PPC64BE-NEXT:    stb 0, 11(3)
+; PPC64BE-NEXT:    addi 0, 25, 1
+; PPC64BE-NEXT:    stb 12, 9(3)
+; PPC64BE-NEXT:    addi 12, 2, 1
+; PPC64BE-NEXT:    addi 11, 11, 1
+; PPC64BE-NEXT:    addi 10, 10, 1
+; PPC64BE-NEXT:    addi 9, 9, 1
+; PPC64BE-NEXT:    addi 8, 8, 1
+; PPC64BE-NEXT:    addi 7, 7, 1
+; PPC64BE-NEXT:    addi 6, 6, 1
+; PPC64BE-NEXT:    addi 5, 5, 1
+; PPC64BE-NEXT:    addi 4, 4, 1
+; PPC64BE-NEXT:    stb 0, 10(3)
+; PPC64BE-NEXT:    stb 12, 8(3)
+; PPC64BE-NEXT:    stb 11, 7(3)
 ; PPC64BE-NEXT:    stb 10, 6(3)
 ; PPC64BE-NEXT:    stb 9, 5(3)
 ; PPC64BE-NEXT:    stb 8, 4(3)
 ; PPC64BE-NEXT:    stb 7, 3(3)
 ; PPC64BE-NEXT:    stb 6, 2(3)
 ; PPC64BE-NEXT:    stb 5, 1(3)
-; PPC64BE-NEXT:    stb 25, 15(3)
-; PPC64BE-NEXT:    stb 12, 14(3)
-; PPC64BE-NEXT:    stb 0, 13(3)
-; PPC64BE-NEXT:    stb 30, 12(3)
-; PPC64BE-NEXT:    stb 29, 11(3)
-; PPC64BE-NEXT:    stb 28, 10(3)
-; PPC64BE-NEXT:    stb 27, 9(3)
-; PPC64BE-NEXT:    stb 26, 8(3)
-; PPC64BE-NEXT:    stb 11, 7(3)
 ; PPC64BE-NEXT:    stb 4, 0(3)
+; PPC64BE-NEXT:    ld 2, -96(1) # 8-byte Folded Reload
 ; PPC64BE-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
 ; PPC64BE-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
 ; PPC64BE-NEXT:    ld 28, -32(1) # 8-byte Folded Reload
@@ -248,6 +256,8 @@ define <16 x i8> @vector_i128_i8(<16 x i
 ; PPC64BE-NEXT:    ld 25, -56(1) # 8-byte Folded Reload
 ; PPC64BE-NEXT:    ld 24, -64(1) # 8-byte Folded Reload
 ; PPC64BE-NEXT:    ld 23, -72(1) # 8-byte Folded Reload
+; PPC64BE-NEXT:    ld 22, -80(1) # 8-byte Folded Reload
+; PPC64BE-NEXT:    ld 21, -88(1) # 8-byte Folded Reload
 ; PPC64BE-NEXT:    blr
 ;
 ; PPC64LE-LABEL: vector_i128_i8:
@@ -269,30 +279,30 @@ define <8 x i16> @vector_i128_i16(<8 x i
 ; PPC32-NEXT:    stw 28, 16(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 29, 20(1) # 4-byte Folded Spill
 ; PPC32-NEXT:    stw 30, 24(1) # 4-byte Folded Spill
-; PPC32-NEXT:    lhz 11, 70(1)
-; PPC32-NEXT:    lhz 12, 66(1)
-; PPC32-NEXT:    lhz 0, 62(1)
-; PPC32-NEXT:    nor 10, 10, 10
-; PPC32-NEXT:    lhz 30, 58(1)
-; PPC32-NEXT:    lhz 29, 54(1)
-; PPC32-NEXT:    lhz 28, 50(1)
-; PPC32-NEXT:    lhz 27, 46(1)
-; PPC32-NEXT:    lhz 26, 42(1)
-; PPC32-NEXT:    nor 9, 9, 9
-; PPC32-NEXT:    nor 8, 8, 8
-; PPC32-NEXT:    nor 7, 7, 7
-; PPC32-NEXT:    nor 6, 6, 6
-; PPC32-NEXT:    nor 5, 5, 5
-; PPC32-NEXT:    nor 4, 4, 4
-; PPC32-NEXT:    nor 3, 3, 3
-; PPC32-NEXT:    subf 3, 3, 26
-; PPC32-NEXT:    subf 4, 4, 27
-; PPC32-NEXT:    subf 5, 5, 28
-; PPC32-NEXT:    subf 6, 6, 29
-; PPC32-NEXT:    subf 7, 7, 30
-; PPC32-NEXT:    subf 8, 8, 0
-; PPC32-NEXT:    subf 9, 9, 12
-; PPC32-NEXT:    subf 10, 10, 11
+; PPC32-NEXT:    lhz 11, 50(1)
+; PPC32-NEXT:    lhz 12, 46(1)
+; PPC32-NEXT:    lhz 0, 42(1)
+; PPC32-NEXT:    lhz 30, 70(1)
+; PPC32-NEXT:    lhz 29, 66(1)
+; PPC32-NEXT:    lhz 28, 62(1)
+; PPC32-NEXT:    lhz 27, 58(1)
+; PPC32-NEXT:    lhz 26, 54(1)
+; PPC32-NEXT:    add 3, 0, 3
+; PPC32-NEXT:    add 4, 12, 4
+; PPC32-NEXT:    add 5, 11, 5
+; PPC32-NEXT:    add 6, 26, 6
+; PPC32-NEXT:    add 7, 27, 7
+; PPC32-NEXT:    add 8, 28, 8
+; PPC32-NEXT:    add 9, 29, 9
+; PPC32-NEXT:    add 10, 30, 10
+; PPC32-NEXT:    addi 3, 3, 1
+; PPC32-NEXT:    addi 4, 4, 1
+; PPC32-NEXT:    addi 5, 5, 1
+; PPC32-NEXT:    addi 6, 6, 1
+; PPC32-NEXT:    addi 7, 7, 1
+; PPC32-NEXT:    addi 8, 8, 1
+; PPC32-NEXT:    addi 9, 9, 1
+; PPC32-NEXT:    addi 10, 10, 1
 ; PPC32-NEXT:    lwz 30, 24(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 29, 20(1) # 4-byte Folded Reload
 ; PPC32-NEXT:    lwz 28, 16(1) # 4-byte Folded Reload
@@ -304,43 +314,43 @@ define <8 x i16> @vector_i128_i16(<8 x i
 ; PPC64BE-LABEL: vector_i128_i16:
 ; PPC64BE:       # %bb.0:
 ; PPC64BE-NEXT:    std 25, -56(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lhz 25, 118(1)
 ; PPC64BE-NEXT:    std 26, -48(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 27, -40(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 28, -32(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 29, -24(1) # 8-byte Folded Spill
 ; PPC64BE-NEXT:    std 30, -16(1) # 8-byte Folded Spill
-; PPC64BE-NEXT:    lhz 11, 182(1)
-; PPC64BE-NEXT:    lhz 12, 174(1)
-; PPC64BE-NEXT:    lhz 0, 166(1)
-; PPC64BE-NEXT:    nor 10, 10, 10
-; PPC64BE-NEXT:    lhz 30, 158(1)
-; PPC64BE-NEXT:    lhz 29, 150(1)
-; PPC64BE-NEXT:    lhz 28, 142(1)
-; PPC64BE-NEXT:    lhz 27, 134(1)
-; PPC64BE-NEXT:    lhz 26, 126(1)
-; PPC64BE-NEXT:    nor 9, 9, 9
-; PPC64BE-NEXT:    nor 8, 8, 8
-; PPC64BE-NEXT:    nor 7, 7, 7
-; PPC64BE-NEXT:    nor 6, 6, 6
-; PPC64BE-NEXT:    nor 5, 5, 5
-; PPC64BE-NEXT:    nor 4, 4, 4
-; PPC64BE-NEXT:    nor 25, 25, 25
-; PPC64BE-NEXT:    subf 4, 4, 26
-; PPC64BE-NEXT:    subf 5, 5, 27
-; PPC64BE-NEXT:    subf 6, 6, 28
-; PPC64BE-NEXT:    subf 7, 7, 29
-; PPC64BE-NEXT:    subf 8, 8, 30
-; PPC64BE-NEXT:    subf 9, 9, 0
-; PPC64BE-NEXT:    subf 10, 10, 12
-; PPC64BE-NEXT:    subf 11, 25, 11
+; PPC64BE-NEXT:    lhz 11, 142(1)
+; PPC64BE-NEXT:    lhz 12, 134(1)
+; PPC64BE-NEXT:    lhz 0, 126(1)
+; PPC64BE-NEXT:    lhz 30, 118(1)
+; PPC64BE-NEXT:    lhz 29, 182(1)
+; PPC64BE-NEXT:    lhz 28, 174(1)
+; PPC64BE-NEXT:    lhz 27, 166(1)
+; PPC64BE-NEXT:    lhz 26, 158(1)
+; PPC64BE-NEXT:    lhz 25, 150(1)
+; PPC64BE-NEXT:    add 4, 0, 4
+; PPC64BE-NEXT:    add 5, 12, 5
+; PPC64BE-NEXT:    add 6, 11, 6
+; PPC64BE-NEXT:    add 7, 25, 7
+; PPC64BE-NEXT:    add 8, 26, 8
+; PPC64BE-NEXT:    add 9, 27, 9
+; PPC64BE-NEXT:    add 10, 28, 10
+; PPC64BE-NEXT:    add 11, 29, 30
+; PPC64BE-NEXT:    addi 4, 4, 1
+; PPC64BE-NEXT:    addi 5, 5, 1
+; PPC64BE-NEXT:    addi 6, 6, 1
+; PPC64BE-NEXT:    addi 7, 7, 1
+; PPC64BE-NEXT:    addi 8, 8, 1
+; PPC64BE-NEXT:    addi 9, 9, 1
+; PPC64BE-NEXT:    addi 10, 10, 1
+; PPC64BE-NEXT:    addi 11, 11, 1
+; PPC64BE-NEXT:    sth 11, 14(3)
 ; PPC64BE-NEXT:    sth 10, 12(3)
 ; PPC64BE-NEXT:    sth 9, 10(3)
 ; PPC64BE-NEXT:    sth 8, 8(3)
 ; PPC64BE-NEXT:    sth 7, 6(3)
 ; PPC64BE-NEXT:    sth 6, 4(3)
 ; PPC64BE-NEXT:    sth 5, 2(3)
-; PPC64BE-NEXT:    sth 11, 14(3)
 ; PPC64BE-NEXT:    sth 4, 0(3)
 ; PPC64BE-NEXT:    ld 30, -16(1) # 8-byte Folded Reload
 ; PPC64BE-NEXT:    ld 29, -24(1) # 8-byte Folded Reload
@@ -363,26 +373,26 @@ define <8 x i16> @vector_i128_i16(<8 x i
 define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; PPC32-LABEL: vector_i128_i32:
 ; PPC32:       # %bb.0:
-; PPC32-NEXT:    nor 6, 6, 6
-; PPC32-NEXT:    nor 5, 5, 5
-; PPC32-NEXT:    nor 4, 4, 4
-; PPC32-NEXT:    nor 3, 3, 3
-; PPC32-NEXT:    subf 3, 3, 7
-; PPC32-NEXT:    subf 4, 4, 8
-; PPC32-NEXT:    subf 5, 5, 9
-; PPC32-NEXT:    subf 6, 6, 10
+; PPC32-NEXT:    add 3, 7, 3
+; PPC32-NEXT:    add 4, 8, 4
+; PPC32-NEXT:    add 5, 9, 5
+; PPC32-NEXT:    add 6, 10, 6
+; PPC32-NEXT:    addi 3, 3, 1
+; PPC32-NEXT:    addi 4, 4, 1
+; PPC32-NEXT:    addi 5, 5, 1
+; PPC32-NEXT:    addi 6, 6, 1
 ; PPC32-NEXT:    blr
 ;
 ; PPC64BE-LABEL: vector_i128_i32:
 ; PPC64BE:       # %bb.0:
-; PPC64BE-NEXT:    nor 3, 3, 3
-; PPC64BE-NEXT:    nor 4, 4, 4
-; PPC64BE-NEXT:    nor 5, 5, 5
-; PPC64BE-NEXT:    nor 6, 6, 6
-; PPC64BE-NEXT:    subf 6, 6, 10
-; PPC64BE-NEXT:    subf 5, 5, 9
-; PPC64BE-NEXT:    subf 4, 4, 8
-; PPC64BE-NEXT:    subf 3, 3, 7
+; PPC64BE-NEXT:    add 6, 10, 6
+; PPC64BE-NEXT:    add 5, 9, 5
+; PPC64BE-NEXT:    add 4, 8, 4
+; PPC64BE-NEXT:    add 3, 7, 3
+; PPC64BE-NEXT:    addi 6, 6, 1
+; PPC64BE-NEXT:    addi 5, 5, 1
+; PPC64BE-NEXT:    addi 4, 4, 1
+; PPC64BE-NEXT:    addi 3, 3, 1
 ; PPC64BE-NEXT:    blr
 ;
 ; PPC64LE-LABEL: vector_i128_i32:
@@ -410,10 +420,10 @@ define <2 x i64> @vector_i128_i64(<2 x i
 ;
 ; PPC64BE-LABEL: vector_i128_i64:
 ; PPC64BE:       # %bb.0:
-; PPC64BE-NEXT:    not 4, 4
-; PPC64BE-NEXT:    not 3, 3
-; PPC64BE-NEXT:    sub 3, 5, 3
-; PPC64BE-NEXT:    sub 4, 6, 4
+; PPC64BE-NEXT:    add 3, 5, 3
+; PPC64BE-NEXT:    add 4, 6, 4
+; PPC64BE-NEXT:    addi 3, 3, 1
+; PPC64BE-NEXT:    addi 4, 4, 1
 ; PPC64BE-NEXT:    blr
 ;
 ; PPC64LE-LABEL: vector_i128_i64:

Modified: llvm/trunk/test/CodeGen/X86/sub-of-not.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sub-of-not.ll?rev=365010&r1=365009&r2=365010&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sub-of-not.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sub-of-not.ll Wed Jul  3 02:41:35 2019
@@ -13,16 +13,16 @@ define i8 @scalar_i8(i8 %x, i8 %y) nounw
 ; X86-LABEL: scalar_i8:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    notb %cl
-; X86-NEXT:    subb %cl, %al
+; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
+; X86-NEXT:    incb %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: scalar_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notb %dil
-; X64-NEXT:    subb %dil, %al
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal (%rsi,%rdi), %eax
+; X64-NEXT:    incb %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %t0 = xor i8 %x, -1
@@ -33,18 +33,17 @@ define i8 @scalar_i8(i8 %x, i8 %y) nounw
 define i16 @scalar_i16(i16 %x, i16 %y) nounwind {
 ; X86-LABEL: scalar_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    notl %ecx
-; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addw {{[0-9]+}}(%esp), %ax
+; X86-NEXT:    incl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: scalar_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notl %edi
-; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal 1(%rsi,%rdi), %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %t0 = xor i16 %x, -1
@@ -53,19 +52,26 @@ define i16 @scalar_i16(i16 %x, i16 %y) n
 }
 
 define i32 @scalar_i32(i32 %x, i32 %y) nounwind {
-; X86-LABEL: scalar_i32:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    notl %ecx
-; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    retl
+; X86-SLOWLEA-LABEL: scalar_i32:
+; X86-SLOWLEA:       # %bb.0:
+; X86-SLOWLEA-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-SLOWLEA-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOWLEA-NEXT:    addl %ecx, %eax
+; X86-SLOWLEA-NEXT:    addl $1, %eax
+; X86-SLOWLEA-NEXT:    retl
+;
+; X86-FASTLEA-LABEL: scalar_i32:
+; X86-FASTLEA:       # %bb.0:
+; X86-FASTLEA-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-FASTLEA-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-FASTLEA-NEXT:    leal 1(%ecx,%eax), %eax
+; X86-FASTLEA-NEXT:    retl
 ;
 ; X64-LABEL: scalar_i32:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notl %edi
-; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NEXT:    # kill: def $edi killed $edi def $rdi
+; X64-NEXT:    leal 1(%rsi,%rdi), %eax
 ; X64-NEXT:    retq
   %t0 = xor i32 %x, -1
   %t1 = sub i32 %y, %t0
@@ -75,23 +81,17 @@ define i32 @scalar_i32(i32 %x, i32 %y) n
 define i64 @scalar_i64(i64 %x, i64 %y) nounwind {
 ; X86-LABEL: scalar_i64:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    notl %esi
-; X86-NEXT:    notl %ecx
-; X86-NEXT:    subl %ecx, %eax
-; X86-NEXT:    sbbl %esi, %edx
-; X86-NEXT:    popl %esi
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    addl $1, %eax
+; X86-NEXT:    adcl $0, %edx
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: scalar_i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    notq %rdi
-; X64-NEXT:    subq %rdi, %rax
+; X64-NEXT:    leaq 1(%rsi,%rdi), %rax
 ; X64-NEXT:    retq
   %t0 = xor i64 %x, -1
   %t1 = sub i64 %y, %t0
@@ -101,10 +101,9 @@ define i64 @scalar_i64(i64 %x, i64 %y) n
 define <16 x i8> @vector_i128_i8(<16 x i8> %x, <16 x i8> %y) nounwind {
 ; ALL-LABEL: vector_i128_i8:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    pcmpeqd %xmm2, %xmm2
-; ALL-NEXT:    pxor %xmm0, %xmm2
-; ALL-NEXT:    psubb %xmm2, %xmm1
-; ALL-NEXT:    movdqa %xmm1, %xmm0
+; ALL-NEXT:    paddb %xmm1, %xmm0
+; ALL-NEXT:    pcmpeqd %xmm1, %xmm1
+; ALL-NEXT:    psubb %xmm1, %xmm0
 ; ALL-NEXT:    ret{{[l|q]}}
   %t0 = xor <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %t1 = sub <16 x i8> %y, %t0
@@ -114,10 +113,9 @@ define <16 x i8> @vector_i128_i8(<16 x i
 define <8 x i16> @vector_i128_i16(<8 x i16> %x, <8 x i16> %y) nounwind {
 ; ALL-LABEL: vector_i128_i16:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    pcmpeqd %xmm2, %xmm2
-; ALL-NEXT:    pxor %xmm0, %xmm2
-; ALL-NEXT:    psubw %xmm2, %xmm1
-; ALL-NEXT:    movdqa %xmm1, %xmm0
+; ALL-NEXT:    paddw %xmm1, %xmm0
+; ALL-NEXT:    pcmpeqd %xmm1, %xmm1
+; ALL-NEXT:    psubw %xmm1, %xmm0
 ; ALL-NEXT:    ret{{[l|q]}}
   %t0 = xor <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
   %t1 = sub <8 x i16> %y, %t0
@@ -127,10 +125,9 @@ define <8 x i16> @vector_i128_i16(<8 x i
 define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y) nounwind {
 ; ALL-LABEL: vector_i128_i32:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    pcmpeqd %xmm2, %xmm2
-; ALL-NEXT:    pxor %xmm0, %xmm2
-; ALL-NEXT:    psubd %xmm2, %xmm1
-; ALL-NEXT:    movdqa %xmm1, %xmm0
+; ALL-NEXT:    paddd %xmm1, %xmm0
+; ALL-NEXT:    pcmpeqd %xmm1, %xmm1
+; ALL-NEXT:    psubd %xmm1, %xmm0
 ; ALL-NEXT:    ret{{[l|q]}}
   %t0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
   %t1 = sub <4 x i32> %y, %t0
@@ -140,10 +137,9 @@ define <4 x i32> @vector_i128_i32(<4 x i
 define <2 x i64> @vector_i128_i64(<2 x i64> %x, <2 x i64> %y) nounwind {
 ; ALL-LABEL: vector_i128_i64:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    pcmpeqd %xmm2, %xmm2
-; ALL-NEXT:    pxor %xmm0, %xmm2
-; ALL-NEXT:    psubq %xmm2, %xmm1
-; ALL-NEXT:    movdqa %xmm1, %xmm0
+; ALL-NEXT:    paddq %xmm1, %xmm0
+; ALL-NEXT:    pcmpeqd %xmm1, %xmm1
+; ALL-NEXT:    psubq %xmm1, %xmm0
 ; ALL-NEXT:    ret{{[l|q]}}
   %t0 = xor <2 x i64> %x, <i64 -1, i64 -1>
   %t1 = sub <2 x i64> %y, %t0




More information about the llvm-commits mailing list