[llvm] 440c4b7 - [SelectionDAG][RISCV][ARM][PowerPC][X86][WebAssembly] Change default abs expansion to use sra (X, size(X)-1); sub (xor (X, Y), Y).

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 20 21:44:18 PST 2022


Author: Craig Topper
Date: 2022-02-20T21:11:23-08:00
New Revision: 440c4b705ad1d494a183b53cd65f21a481726157

URL: https://github.com/llvm/llvm-project/commit/440c4b705ad1d494a183b53cd65f21a481726157
DIFF: https://github.com/llvm/llvm-project/commit/440c4b705ad1d494a183b53cd65f21a481726157.diff

LOG: [SelectionDAG][RISCV][ARM][PowerPC][X86][WebAssembly] Change default abs expansion to use sra (X, size(X)-1); sub (xor (X, Y), Y).

Previous we used sra (X, size(X)-1); xor (add (X, Y), Y).

By placing sub at the end, we allow RISCV to combine sign_extend_inreg
with it to form subw.

Some X86 tests for Z - abs(X) seem to have improved as well.

Other targets look to be a wash.

I had to modify ARM's abs matching code to match from sub instead of
xor. Maybe instead ISD::ABS should be made legal. I'll try that in
parallel to this patch.

This is an alternative to D119099 which was focused on RISCV only.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D119171

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
    llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
    llvm/test/CodeGen/RISCV/rv32zbb.ll
    llvm/test/CodeGen/RISCV/rv64zbb.ll
    llvm/test/CodeGen/Thumb/iabs.ll
    llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll
    llvm/test/CodeGen/Thumb2/abs.ll
    llvm/test/CodeGen/WebAssembly/PR41149.ll
    llvm/test/CodeGen/X86/abs.ll
    llvm/test/CodeGen/X86/combine-abs.ll
    llvm/test/CodeGen/X86/iabs.ll
    llvm/test/CodeGen/X86/neg-abs.ll
    llvm/test/CodeGen/X86/viabs.ll
    llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0b69496d14f9f..6619f1c42a888 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7443,13 +7443,13 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
   SDValue Shift =
       DAG.getNode(ISD::SRA, dl, VT, Op,
                   DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
-  if (!IsNegative) {
-    SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
-    return DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
-  }
+  SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
+
+  // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
+  if (!IsNegative)
+    return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
 
   // 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
-  SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
   return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
 }
 

diff  --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 98c8133282a26..1735c0ddd11a5 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3464,40 +3464,39 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
   return false;
 }
 
-/// Target-specific DAG combining for ISD::XOR.
+/// Target-specific DAG combining for ISD::SUB.
 /// Target-independent combining lowers SELECT_CC nodes of the form
 /// select_cc setg[ge] X,  0,  X, -X
 /// select_cc setgt    X, -1,  X, -X
 /// select_cc setl[te] X,  0, -X,  X
 /// select_cc setlt    X,  1, -X,  X
 /// which represent Integer ABS into:
-/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
 /// ARM instruction selection detects the latter and matches it to
 /// ARM::ABS or ARM::t2ABS machine node.
 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
-  SDValue XORSrc0 = N->getOperand(0);
-  SDValue XORSrc1 = N->getOperand(1);
+  SDValue SUBSrc0 = N->getOperand(0);
+  SDValue SUBSrc1 = N->getOperand(1);
   EVT VT = N->getValueType(0);
 
   if (Subtarget->isThumb1Only())
     return false;
 
-  if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
+  if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
     return false;
 
-  SDValue ADDSrc0 = XORSrc0.getOperand(0);
-  SDValue ADDSrc1 = XORSrc0.getOperand(1);
-  SDValue SRASrc0 = XORSrc1.getOperand(0);
-  SDValue SRASrc1 = XORSrc1.getOperand(1);
+  SDValue XORSrc0 = SUBSrc0.getOperand(0);
+  SDValue XORSrc1 = SUBSrc0.getOperand(1);
+  SDValue SRASrc0 = SUBSrc1.getOperand(0);
+  SDValue SRASrc1 = SUBSrc1.getOperand(1);
   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
   EVT XType = SRASrc0.getValueType();
   unsigned Size = XType.getSizeInBits() - 1;
 
-  if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
-      XType.isInteger() && SRAConstant != nullptr &&
-      Size == SRAConstant->getZExtValue()) {
+  if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
+      SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
-    CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
+    CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
     return true;
   }
 
@@ -3673,8 +3672,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
     if (tryInlineAsm(N))
       return;
     break;
-  case ISD::XOR:
-    // Select special operations if XOR node forms integer ABS pattern
+  case ISD::SUB:
+    // Select special operations if SUB node forms integer ABS pattern
     if (tryABSOp(N))
       return;
     // Other cases are autogenerated.

diff  --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index 1dc8a7b99bc37..ec5e433b57cf7 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -99,10 +99,10 @@ define <2 x i64> @sub_absv_64(<2 x i64> %a, <2 x i64> %b) local_unnamed_addr {
 ; CHECK-PWR7-NEXT:    sub r4, r5, r6
 ; CHECK-PWR7-NEXT:    sradi r5, r3, 63
 ; CHECK-PWR7-NEXT:    sradi r6, r4, 63
-; CHECK-PWR7-NEXT:    add r3, r3, r5
-; CHECK-PWR7-NEXT:    add r4, r4, r6
 ; CHECK-PWR7-NEXT:    xor r3, r3, r5
 ; CHECK-PWR7-NEXT:    xor r4, r4, r6
+; CHECK-PWR7-NEXT:    sub r3, r3, r5
+; CHECK-PWR7-NEXT:    sub r4, r4, r6
 ; CHECK-PWR7-NEXT:    std r3, -8(r1)
 ; CHECK-PWR7-NEXT:    addi r3, r1, -16
 ; CHECK-PWR7-NEXT:    std r4, -16(r1)
@@ -307,13 +307,13 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    sub r4, r7, r4
 ; CHECK-PWR9-LE-NEXT:    srawi r6, r3, 31
 ; CHECK-PWR9-LE-NEXT:    srawi r7, r4, 31
-; CHECK-PWR9-LE-NEXT:    add r3, r3, r6
-; CHECK-PWR9-LE-NEXT:    add r4, r4, r7
-; CHECK-PWR9-LE-NEXT:    xor r6, r3, r6
-; CHECK-PWR9-LE-NEXT:    srawi r3, r5, 31
+; CHECK-PWR9-LE-NEXT:    xor r3, r3, r6
 ; CHECK-PWR9-LE-NEXT:    xor r4, r4, r7
-; CHECK-PWR9-LE-NEXT:    add r5, r5, r3
-; CHECK-PWR9-LE-NEXT:    xor r3, r5, r3
+; CHECK-PWR9-LE-NEXT:    sub r6, r3, r6
+; CHECK-PWR9-LE-NEXT:    srawi r3, r5, 31
+; CHECK-PWR9-LE-NEXT:    sub r4, r4, r7
+; CHECK-PWR9-LE-NEXT:    xor r5, r5, r3
+; CHECK-PWR9-LE-NEXT:    sub r3, r5, r3
 ; CHECK-PWR9-LE-NEXT:    li r5, 3
 ; CHECK-PWR9-LE-NEXT:    vextubrx r7, r5, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r5, r5, v3
@@ -321,8 +321,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    clrlwi r5, r5, 24
 ; CHECK-PWR9-LE-NEXT:    sub r5, r7, r5
 ; CHECK-PWR9-LE-NEXT:    srawi r7, r5, 31
-; CHECK-PWR9-LE-NEXT:    add r5, r5, r7
 ; CHECK-PWR9-LE-NEXT:    xor r5, r5, r7
+; CHECK-PWR9-LE-NEXT:    sub r5, r5, r7
 ; CHECK-PWR9-LE-NEXT:    li r7, 4
 ; CHECK-PWR9-LE-NEXT:    vextubrx r8, r7, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r7, r7, v3
@@ -331,8 +331,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    clrlwi r7, r7, 24
 ; CHECK-PWR9-LE-NEXT:    sub r7, r8, r7
 ; CHECK-PWR9-LE-NEXT:    srawi r8, r7, 31
-; CHECK-PWR9-LE-NEXT:    add r7, r7, r8
 ; CHECK-PWR9-LE-NEXT:    xor r7, r7, r8
+; CHECK-PWR9-LE-NEXT:    sub r7, r7, r8
 ; CHECK-PWR9-LE-NEXT:    li r8, 5
 ; CHECK-PWR9-LE-NEXT:    vextubrx r9, r8, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r8, r8, v3
@@ -340,8 +340,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    clrlwi r8, r8, 24
 ; CHECK-PWR9-LE-NEXT:    sub r8, r9, r8
 ; CHECK-PWR9-LE-NEXT:    srawi r9, r8, 31
-; CHECK-PWR9-LE-NEXT:    add r8, r8, r9
 ; CHECK-PWR9-LE-NEXT:    xor r8, r8, r9
+; CHECK-PWR9-LE-NEXT:    sub r8, r8, r9
 ; CHECK-PWR9-LE-NEXT:    li r9, 6
 ; CHECK-PWR9-LE-NEXT:    vextubrx r10, r9, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r9, r9, v3
@@ -349,8 +349,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    clrlwi r9, r9, 24
 ; CHECK-PWR9-LE-NEXT:    sub r9, r10, r9
 ; CHECK-PWR9-LE-NEXT:    srawi r10, r9, 31
-; CHECK-PWR9-LE-NEXT:    add r9, r9, r10
 ; CHECK-PWR9-LE-NEXT:    xor r9, r9, r10
+; CHECK-PWR9-LE-NEXT:    sub r9, r9, r10
 ; CHECK-PWR9-LE-NEXT:    li r10, 7
 ; CHECK-PWR9-LE-NEXT:    vextubrx r11, r10, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r10, r10, v3
@@ -358,8 +358,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    clrlwi r10, r10, 24
 ; CHECK-PWR9-LE-NEXT:    sub r10, r11, r10
 ; CHECK-PWR9-LE-NEXT:    srawi r11, r10, 31
-; CHECK-PWR9-LE-NEXT:    add r10, r10, r11
 ; CHECK-PWR9-LE-NEXT:    xor r10, r10, r11
+; CHECK-PWR9-LE-NEXT:    sub r10, r10, r11
 ; CHECK-PWR9-LE-NEXT:    li r11, 8
 ; CHECK-PWR9-LE-NEXT:    vextubrx r12, r11, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r11, r11, v3
@@ -368,8 +368,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    clrlwi r11, r11, 24
 ; CHECK-PWR9-LE-NEXT:    sub r11, r12, r11
 ; CHECK-PWR9-LE-NEXT:    srawi r12, r11, 31
-; CHECK-PWR9-LE-NEXT:    add r11, r11, r12
 ; CHECK-PWR9-LE-NEXT:    xor r11, r11, r12
+; CHECK-PWR9-LE-NEXT:    sub r11, r11, r12
 ; CHECK-PWR9-LE-NEXT:    li r12, 9
 ; CHECK-PWR9-LE-NEXT:    vextubrx r0, r12, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r12, r12, v3
@@ -377,8 +377,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    clrlwi r12, r12, 24
 ; CHECK-PWR9-LE-NEXT:    sub r12, r0, r12
 ; CHECK-PWR9-LE-NEXT:    srawi r0, r12, 31
-; CHECK-PWR9-LE-NEXT:    add r12, r12, r0
 ; CHECK-PWR9-LE-NEXT:    xor r12, r12, r0
+; CHECK-PWR9-LE-NEXT:    sub r12, r12, r0
 ; CHECK-PWR9-LE-NEXT:    li r0, 10
 ; CHECK-PWR9-LE-NEXT:    vextubrx r30, r0, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r0, r0, v3
@@ -386,8 +386,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    clrlwi r0, r0, 24
 ; CHECK-PWR9-LE-NEXT:    sub r0, r30, r0
 ; CHECK-PWR9-LE-NEXT:    srawi r30, r0, 31
-; CHECK-PWR9-LE-NEXT:    add r0, r0, r30
 ; CHECK-PWR9-LE-NEXT:    xor r0, r0, r30
+; CHECK-PWR9-LE-NEXT:    sub r0, r0, r30
 ; CHECK-PWR9-LE-NEXT:    li r30, 11
 ; CHECK-PWR9-LE-NEXT:    vextubrx r29, r30, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r30, r30, v3
@@ -395,8 +395,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    clrlwi r30, r30, 24
 ; CHECK-PWR9-LE-NEXT:    sub r30, r29, r30
 ; CHECK-PWR9-LE-NEXT:    srawi r29, r30, 31
-; CHECK-PWR9-LE-NEXT:    add r30, r30, r29
 ; CHECK-PWR9-LE-NEXT:    xor r30, r30, r29
+; CHECK-PWR9-LE-NEXT:    sub r30, r30, r29
 ; CHECK-PWR9-LE-NEXT:    li r29, 12
 ; CHECK-PWR9-LE-NEXT:    vextubrx r28, r29, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r29, r29, v3
@@ -404,8 +404,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    clrlwi r29, r29, 24
 ; CHECK-PWR9-LE-NEXT:    sub r29, r28, r29
 ; CHECK-PWR9-LE-NEXT:    srawi r28, r29, 31
-; CHECK-PWR9-LE-NEXT:    add r29, r29, r28
 ; CHECK-PWR9-LE-NEXT:    xor r29, r29, r28
+; CHECK-PWR9-LE-NEXT:    sub r29, r29, r28
 ; CHECK-PWR9-LE-NEXT:    li r28, 13
 ; CHECK-PWR9-LE-NEXT:    vextubrx r27, r28, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r28, r28, v3
@@ -413,8 +413,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    clrlwi r28, r28, 24
 ; CHECK-PWR9-LE-NEXT:    sub r28, r27, r28
 ; CHECK-PWR9-LE-NEXT:    srawi r27, r28, 31
-; CHECK-PWR9-LE-NEXT:    add r28, r28, r27
 ; CHECK-PWR9-LE-NEXT:    xor r28, r28, r27
+; CHECK-PWR9-LE-NEXT:    sub r28, r28, r27
 ; CHECK-PWR9-LE-NEXT:    li r27, 14
 ; CHECK-PWR9-LE-NEXT:    vextubrx r26, r27, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r27, r27, v3
@@ -422,8 +422,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    clrlwi r27, r27, 24
 ; CHECK-PWR9-LE-NEXT:    sub r27, r26, r27
 ; CHECK-PWR9-LE-NEXT:    srawi r26, r27, 31
-; CHECK-PWR9-LE-NEXT:    add r27, r27, r26
 ; CHECK-PWR9-LE-NEXT:    xor r27, r27, r26
+; CHECK-PWR9-LE-NEXT:    sub r27, r27, r26
 ; CHECK-PWR9-LE-NEXT:    li r26, 15
 ; CHECK-PWR9-LE-NEXT:    vextubrx r25, r26, v2
 ; CHECK-PWR9-LE-NEXT:    vextubrx r26, r26, v3
@@ -441,10 +441,10 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-LE-NEXT:    srawi r25, r26, 31
 ; CHECK-PWR9-LE-NEXT:    vmrghb v3, v4, v3
 ; CHECK-PWR9-LE-NEXT:    mtvsrd v4, r9
-; CHECK-PWR9-LE-NEXT:    add r26, r26, r25
+; CHECK-PWR9-LE-NEXT:    xor r26, r26, r25
 ; CHECK-PWR9-LE-NEXT:    vmrghb v4, v5, v4
 ; CHECK-PWR9-LE-NEXT:    mtvsrd v5, r30
-; CHECK-PWR9-LE-NEXT:    xor r26, r26, r25
+; CHECK-PWR9-LE-NEXT:    sub r26, r26, r25
 ; CHECK-PWR9-LE-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-LE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-LE-NEXT:    mtvsrd v0, r26
@@ -499,13 +499,13 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    srawi r6, r3, 31
 ; CHECK-PWR9-BE-NEXT:    srawi r7, r4, 31
 ; CHECK-PWR9-BE-NEXT:    srawi r8, r5, 31
-; CHECK-PWR9-BE-NEXT:    add r3, r3, r6
-; CHECK-PWR9-BE-NEXT:    add r4, r4, r7
-; CHECK-PWR9-BE-NEXT:    add r5, r5, r8
 ; CHECK-PWR9-BE-NEXT:    xor r3, r3, r6
-; CHECK-PWR9-BE-NEXT:    li r6, 3
 ; CHECK-PWR9-BE-NEXT:    xor r4, r4, r7
 ; CHECK-PWR9-BE-NEXT:    xor r5, r5, r8
+; CHECK-PWR9-BE-NEXT:    sub r3, r3, r6
+; CHECK-PWR9-BE-NEXT:    li r6, 3
+; CHECK-PWR9-BE-NEXT:    sub r4, r4, r7
+; CHECK-PWR9-BE-NEXT:    sub r5, r5, r8
 ; CHECK-PWR9-BE-NEXT:    vextublx r7, r6, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r6, r6, v3
 ; CHECK-PWR9-BE-NEXT:    mtvsrwz v1, r3
@@ -513,8 +513,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    clrlwi r6, r6, 24
 ; CHECK-PWR9-BE-NEXT:    sub r6, r7, r6
 ; CHECK-PWR9-BE-NEXT:    srawi r7, r6, 31
-; CHECK-PWR9-BE-NEXT:    add r6, r6, r7
 ; CHECK-PWR9-BE-NEXT:    xor r6, r6, r7
+; CHECK-PWR9-BE-NEXT:    sub r6, r6, r7
 ; CHECK-PWR9-BE-NEXT:    li r7, 4
 ; CHECK-PWR9-BE-NEXT:    vextublx r8, r7, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r7, r7, v3
@@ -522,8 +522,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    clrlwi r7, r7, 24
 ; CHECK-PWR9-BE-NEXT:    sub r7, r8, r7
 ; CHECK-PWR9-BE-NEXT:    srawi r8, r7, 31
-; CHECK-PWR9-BE-NEXT:    add r7, r7, r8
 ; CHECK-PWR9-BE-NEXT:    xor r7, r7, r8
+; CHECK-PWR9-BE-NEXT:    sub r7, r7, r8
 ; CHECK-PWR9-BE-NEXT:    li r8, 5
 ; CHECK-PWR9-BE-NEXT:    vextublx r9, r8, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r8, r8, v3
@@ -531,8 +531,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    clrlwi r8, r8, 24
 ; CHECK-PWR9-BE-NEXT:    sub r8, r9, r8
 ; CHECK-PWR9-BE-NEXT:    srawi r9, r8, 31
-; CHECK-PWR9-BE-NEXT:    add r8, r8, r9
 ; CHECK-PWR9-BE-NEXT:    xor r8, r8, r9
+; CHECK-PWR9-BE-NEXT:    sub r8, r8, r9
 ; CHECK-PWR9-BE-NEXT:    li r9, 6
 ; CHECK-PWR9-BE-NEXT:    vextublx r10, r9, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r9, r9, v3
@@ -540,8 +540,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    clrlwi r9, r9, 24
 ; CHECK-PWR9-BE-NEXT:    sub r9, r10, r9
 ; CHECK-PWR9-BE-NEXT:    srawi r10, r9, 31
-; CHECK-PWR9-BE-NEXT:    add r9, r9, r10
 ; CHECK-PWR9-BE-NEXT:    xor r9, r9, r10
+; CHECK-PWR9-BE-NEXT:    sub r9, r9, r10
 ; CHECK-PWR9-BE-NEXT:    li r10, 7
 ; CHECK-PWR9-BE-NEXT:    vextublx r11, r10, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r10, r10, v3
@@ -549,8 +549,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    clrlwi r10, r10, 24
 ; CHECK-PWR9-BE-NEXT:    sub r10, r11, r10
 ; CHECK-PWR9-BE-NEXT:    srawi r11, r10, 31
-; CHECK-PWR9-BE-NEXT:    add r10, r10, r11
 ; CHECK-PWR9-BE-NEXT:    xor r10, r10, r11
+; CHECK-PWR9-BE-NEXT:    sub r10, r10, r11
 ; CHECK-PWR9-BE-NEXT:    li r11, 8
 ; CHECK-PWR9-BE-NEXT:    vextublx r12, r11, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r11, r11, v3
@@ -558,8 +558,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    clrlwi r11, r11, 24
 ; CHECK-PWR9-BE-NEXT:    sub r11, r12, r11
 ; CHECK-PWR9-BE-NEXT:    srawi r12, r11, 31
-; CHECK-PWR9-BE-NEXT:    add r11, r11, r12
 ; CHECK-PWR9-BE-NEXT:    xor r11, r11, r12
+; CHECK-PWR9-BE-NEXT:    sub r11, r11, r12
 ; CHECK-PWR9-BE-NEXT:    li r12, 9
 ; CHECK-PWR9-BE-NEXT:    vextublx r0, r12, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r12, r12, v3
@@ -568,8 +568,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    clrlwi r12, r12, 24
 ; CHECK-PWR9-BE-NEXT:    sub r12, r0, r12
 ; CHECK-PWR9-BE-NEXT:    srawi r0, r12, 31
-; CHECK-PWR9-BE-NEXT:    add r12, r12, r0
 ; CHECK-PWR9-BE-NEXT:    xor r12, r12, r0
+; CHECK-PWR9-BE-NEXT:    sub r12, r12, r0
 ; CHECK-PWR9-BE-NEXT:    li r0, 10
 ; CHECK-PWR9-BE-NEXT:    vextublx r30, r0, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r0, r0, v3
@@ -577,8 +577,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    clrlwi r0, r0, 24
 ; CHECK-PWR9-BE-NEXT:    sub r0, r30, r0
 ; CHECK-PWR9-BE-NEXT:    srawi r30, r0, 31
-; CHECK-PWR9-BE-NEXT:    add r0, r0, r30
 ; CHECK-PWR9-BE-NEXT:    xor r0, r0, r30
+; CHECK-PWR9-BE-NEXT:    sub r0, r0, r30
 ; CHECK-PWR9-BE-NEXT:    li r30, 11
 ; CHECK-PWR9-BE-NEXT:    vextublx r29, r30, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r30, r30, v3
@@ -586,8 +586,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    clrlwi r30, r30, 24
 ; CHECK-PWR9-BE-NEXT:    sub r30, r29, r30
 ; CHECK-PWR9-BE-NEXT:    srawi r29, r30, 31
-; CHECK-PWR9-BE-NEXT:    add r30, r30, r29
 ; CHECK-PWR9-BE-NEXT:    xor r30, r30, r29
+; CHECK-PWR9-BE-NEXT:    sub r30, r30, r29
 ; CHECK-PWR9-BE-NEXT:    li r29, 12
 ; CHECK-PWR9-BE-NEXT:    vextublx r28, r29, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r29, r29, v3
@@ -595,8 +595,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    clrlwi r29, r29, 24
 ; CHECK-PWR9-BE-NEXT:    sub r29, r28, r29
 ; CHECK-PWR9-BE-NEXT:    srawi r28, r29, 31
-; CHECK-PWR9-BE-NEXT:    add r29, r29, r28
 ; CHECK-PWR9-BE-NEXT:    xor r29, r29, r28
+; CHECK-PWR9-BE-NEXT:    sub r29, r29, r28
 ; CHECK-PWR9-BE-NEXT:    li r28, 13
 ; CHECK-PWR9-BE-NEXT:    vextublx r27, r28, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r28, r28, v3
@@ -606,8 +606,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    clrlwi r28, r28, 24
 ; CHECK-PWR9-BE-NEXT:    sub r28, r27, r28
 ; CHECK-PWR9-BE-NEXT:    srawi r27, r28, 31
-; CHECK-PWR9-BE-NEXT:    add r28, r28, r27
 ; CHECK-PWR9-BE-NEXT:    xor r28, r28, r27
+; CHECK-PWR9-BE-NEXT:    sub r28, r28, r27
 ; CHECK-PWR9-BE-NEXT:    li r27, 14
 ; CHECK-PWR9-BE-NEXT:    vextublx r26, r27, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r27, r27, v3
@@ -615,8 +615,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    clrlwi r27, r27, 24
 ; CHECK-PWR9-BE-NEXT:    sub r27, r26, r27
 ; CHECK-PWR9-BE-NEXT:    srawi r26, r27, 31
-; CHECK-PWR9-BE-NEXT:    add r27, r27, r26
 ; CHECK-PWR9-BE-NEXT:    xor r27, r27, r26
+; CHECK-PWR9-BE-NEXT:    sub r27, r27, r26
 ; CHECK-PWR9-BE-NEXT:    li r26, 15
 ; CHECK-PWR9-BE-NEXT:    vextublx r25, r26, v2
 ; CHECK-PWR9-BE-NEXT:    vextublx r26, r26, v3
@@ -629,8 +629,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR9-BE-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-BE-NEXT:    sub r26, r25, r26
 ; CHECK-PWR9-BE-NEXT:    srawi r25, r26, 31
-; CHECK-PWR9-BE-NEXT:    add r26, r26, r25
 ; CHECK-PWR9-BE-NEXT:    xor r26, r26, r25
+; CHECK-PWR9-BE-NEXT:    sub r26, r26, r25
 ; CHECK-PWR9-BE-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
 ; CHECK-PWR9-BE-NEXT:    mtvsrwz v2, r26
 ; CHECK-PWR9-BE-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
@@ -707,25 +707,25 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR8-NEXT:    srawi r12, r7, 31
 ; CHECK-PWR8-NEXT:    clrlwi r10, r0, 24
 ; CHECK-PWR8-NEXT:    clrlwi r0, r30, 24
-; CHECK-PWR8-NEXT:    add r4, r4, r3
-; CHECK-PWR8-NEXT:    add r7, r7, r12
+; CHECK-PWR8-NEXT:    xor r4, r4, r3
+; CHECK-PWR8-NEXT:    xor r7, r7, r12
 ; CHECK-PWR8-NEXT:    sub r10, r10, r0
 ; CHECK-PWR8-NEXT:    std r20, -96(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    std r21, -88(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    xor r3, r4, r3
+; CHECK-PWR8-NEXT:    sub r3, r4, r3
 ; CHECK-PWR8-NEXT:    srawi r4, r9, 31
-; CHECK-PWR8-NEXT:    xor r7, r7, r12
+; CHECK-PWR8-NEXT:    sub r7, r7, r12
 ; CHECK-PWR8-NEXT:    std r22, -80(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    rldicl r29, r5, 24, 56
 ; CHECK-PWR8-NEXT:    rldicl r28, r6, 24, 56
-; CHECK-PWR8-NEXT:    add r9, r9, r4
+; CHECK-PWR8-NEXT:    xor r9, r9, r4
 ; CHECK-PWR8-NEXT:    mtvsrd v3, r7
 ; CHECK-PWR8-NEXT:    rldicl r27, r5, 16, 56
 ; CHECK-PWR8-NEXT:    rldicl r25, r6, 16, 56
 ; CHECK-PWR8-NEXT:    clrlwi r30, r29, 24
 ; CHECK-PWR8-NEXT:    clrlwi r29, r28, 24
 ; CHECK-PWR8-NEXT:    mtvsrd v2, r3
-; CHECK-PWR8-NEXT:    xor r4, r9, r4
+; CHECK-PWR8-NEXT:    sub r4, r9, r4
 ; CHECK-PWR8-NEXT:    srawi r7, r10, 31
 ; CHECK-PWR8-NEXT:    srawi r3, r11, 31
 ; CHECK-PWR8-NEXT:    clrlwi r9, r27, 24
@@ -733,15 +733,15 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR8-NEXT:    sub r0, r30, r29
 ; CHECK-PWR8-NEXT:    mtvsrd v4, r4
 ; CHECK-PWR8-NEXT:    std r23, -72(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    add r10, r10, r7
-; CHECK-PWR8-NEXT:    add r11, r11, r3
+; CHECK-PWR8-NEXT:    xor r10, r10, r7
+; CHECK-PWR8-NEXT:    xor r11, r11, r3
 ; CHECK-PWR8-NEXT:    sub r9, r9, r12
 ; CHECK-PWR8-NEXT:    std r18, -112(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    std r19, -104(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    vmrghb v2, v3, v2
-; CHECK-PWR8-NEXT:    xor r7, r10, r7
+; CHECK-PWR8-NEXT:    sub r7, r10, r7
 ; CHECK-PWR8-NEXT:    rldicl r5, r5, 8, 56
-; CHECK-PWR8-NEXT:    xor r3, r11, r3
+; CHECK-PWR8-NEXT:    sub r3, r11, r3
 ; CHECK-PWR8-NEXT:    rldicl r6, r6, 8, 56
 ; CHECK-PWR8-NEXT:    srawi r4, r0, 31
 ; CHECK-PWR8-NEXT:    mtvsrd v0, r7
@@ -754,13 +754,13 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR8-NEXT:    clrlwi r5, r6, 24
 ; CHECK-PWR8-NEXT:    clrldi r22, r24, 56
 ; CHECK-PWR8-NEXT:    rldicl r21, r26, 56, 56
-; CHECK-PWR8-NEXT:    add r10, r0, r4
-; CHECK-PWR8-NEXT:    add r9, r9, r7
+; CHECK-PWR8-NEXT:    xor r10, r0, r4
+; CHECK-PWR8-NEXT:    xor r9, r9, r7
 ; CHECK-PWR8-NEXT:    rldicl r20, r24, 56, 56
 ; CHECK-PWR8-NEXT:    rldicl r19, r26, 48, 56
 ; CHECK-PWR8-NEXT:    sub r3, r3, r5
-; CHECK-PWR8-NEXT:    xor r4, r10, r4
-; CHECK-PWR8-NEXT:    xor r7, r9, r7
+; CHECK-PWR8-NEXT:    sub r4, r10, r4
+; CHECK-PWR8-NEXT:    sub r7, r9, r7
 ; CHECK-PWR8-NEXT:    clrlwi r9, r23, 24
 ; CHECK-PWR8-NEXT:    rldicl r18, r24, 48, 56
 ; CHECK-PWR8-NEXT:    clrlwi r10, r22, 24
@@ -779,7 +779,7 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR8-NEXT:    clrlwi r12, r18, 24
 ; CHECK-PWR8-NEXT:    vmrghb v4, v5, v4
 ; CHECK-PWR8-NEXT:    std r31, -8(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT:    add r3, r3, r4
+; CHECK-PWR8-NEXT:    xor r3, r3, r4
 ; CHECK-PWR8-NEXT:    sub r7, r11, r12
 ; CHECK-PWR8-NEXT:    clrlwi r11, r17, 24
 ; CHECK-PWR8-NEXT:    clrlwi r12, r16, 24
@@ -787,7 +787,7 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR8-NEXT:    std r2, -152(r1) # 8-byte Folded Spill
 ; CHECK-PWR8-NEXT:    rldicl r15, r26, 32, 56
 ; CHECK-PWR8-NEXT:    rldicl r14, r24, 32, 56
-; CHECK-PWR8-NEXT:    xor r3, r3, r4
+; CHECK-PWR8-NEXT:    sub r3, r3, r4
 ; CHECK-PWR8-NEXT:    sub r11, r11, r12
 ; CHECK-PWR8-NEXT:    srawi r4, r9, 31
 ; CHECK-PWR8-NEXT:    srawi r12, r10, 31
@@ -795,40 +795,40 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR8-NEXT:    clrlwi r30, r14, 24
 ; CHECK-PWR8-NEXT:    mtvsrd v5, r3
 ; CHECK-PWR8-NEXT:    ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    add r9, r9, r4
-; CHECK-PWR8-NEXT:    add r10, r10, r12
+; CHECK-PWR8-NEXT:    xor r9, r9, r4
+; CHECK-PWR8-NEXT:    xor r10, r10, r12
 ; CHECK-PWR8-NEXT:    sub r3, r0, r30
 ; CHECK-PWR8-NEXT:    ld r25, -56(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    ld r23, -72(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    ld r22, -80(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    srawi r28, r11, 31
-; CHECK-PWR8-NEXT:    xor r4, r9, r4
-; CHECK-PWR8-NEXT:    xor r10, r10, r12
+; CHECK-PWR8-NEXT:    sub r4, r9, r4
+; CHECK-PWR8-NEXT:    sub r10, r10, r12
 ; CHECK-PWR8-NEXT:    vmrghb v3, v5, v3
 ; CHECK-PWR8-NEXT:    ld r21, -88(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    ld r20, -96(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    srawi r29, r7, 31
 ; CHECK-PWR8-NEXT:    srawi r9, r3, 31
 ; CHECK-PWR8-NEXT:    mtvsrd v5, r4
-; CHECK-PWR8-NEXT:    add r4, r11, r28
+; CHECK-PWR8-NEXT:    xor r4, r11, r28
 ; CHECK-PWR8-NEXT:    ld r19, -104(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    ld r18, -112(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    mtvsrd v1, r10
 ; CHECK-PWR8-NEXT:    ld r10, -160(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    rldicl r31, r26, 24, 56
 ; CHECK-PWR8-NEXT:    rldicl r2, r24, 24, 56
-; CHECK-PWR8-NEXT:    add r7, r7, r29
-; CHECK-PWR8-NEXT:    add r3, r3, r9
+; CHECK-PWR8-NEXT:    xor r7, r7, r29
+; CHECK-PWR8-NEXT:    xor r3, r3, r9
 ; CHECK-PWR8-NEXT:    rldicl r8, r24, 16, 56
 ; CHECK-PWR8-NEXT:    rldicl r6, r26, 8, 56
-; CHECK-PWR8-NEXT:    xor r4, r4, r28
+; CHECK-PWR8-NEXT:    sub r4, r4, r28
 ; CHECK-PWR8-NEXT:    clrlwi r0, r31, 24
 ; CHECK-PWR8-NEXT:    clrlwi r30, r2, 24
-; CHECK-PWR8-NEXT:    xor r7, r7, r29
+; CHECK-PWR8-NEXT:    sub r7, r7, r29
 ; CHECK-PWR8-NEXT:    rldicl r5, r24, 8, 56
 ; CHECK-PWR8-NEXT:    clrlwi r10, r10, 24
 ; CHECK-PWR8-NEXT:    clrlwi r8, r8, 24
-; CHECK-PWR8-NEXT:    xor r3, r3, r9
+; CHECK-PWR8-NEXT:    sub r3, r3, r9
 ; CHECK-PWR8-NEXT:    mtvsrd v7, r4
 ; CHECK-PWR8-NEXT:    clrlwi r4, r6, 24
 ; CHECK-PWR8-NEXT:    clrlwi r5, r5, 24
@@ -845,18 +845,18 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR8-NEXT:    ld r28, -32(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    srawi r6, r7, 31
 ; CHECK-PWR8-NEXT:    srawi r5, r3, 31
-; CHECK-PWR8-NEXT:    add r8, r0, r12
+; CHECK-PWR8-NEXT:    xor r8, r0, r12
 ; CHECK-PWR8-NEXT:    vmrghb v5, v1, v5
 ; CHECK-PWR8-NEXT:    ld r26, -48(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    ld r24, -64(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    add r4, r7, r6
-; CHECK-PWR8-NEXT:    add r3, r3, r5
-; CHECK-PWR8-NEXT:    xor r8, r8, r12
+; CHECK-PWR8-NEXT:    xor r4, r7, r6
+; CHECK-PWR8-NEXT:    xor r3, r3, r5
+; CHECK-PWR8-NEXT:    sub r8, r8, r12
 ; CHECK-PWR8-NEXT:    vmrghb v6, v7, v6
 ; CHECK-PWR8-NEXT:    ld r17, -120(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    ld r16, -128(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT:    xor r4, r4, r6
-; CHECK-PWR8-NEXT:    xor r3, r3, r5
+; CHECK-PWR8-NEXT:    sub r4, r4, r6
+; CHECK-PWR8-NEXT:    sub r3, r3, r5
 ; CHECK-PWR8-NEXT:    mtvsrd v9, r8
 ; CHECK-PWR8-NEXT:    ld r15, -136(r1) # 8-byte Folded Reload
 ; CHECK-PWR8-NEXT:    ld r14, -144(r1) # 8-byte Folded Reload
@@ -875,15 +875,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ;
 ; CHECK-PWR7-LABEL: sub_absv_8_ext:
 ; CHECK-PWR7:       # %bb.0: # %entry
-; CHECK-PWR7-NEXT:    stdu r1, -464(r1)
-; CHECK-PWR7-NEXT:    .cfi_def_cfa_offset 464
-; CHECK-PWR7-NEXT:    .cfi_offset r16, -128
-; CHECK-PWR7-NEXT:    .cfi_offset r17, -120
-; CHECK-PWR7-NEXT:    .cfi_offset r18, -112
-; CHECK-PWR7-NEXT:    .cfi_offset r19, -104
-; CHECK-PWR7-NEXT:    .cfi_offset r20, -96
-; CHECK-PWR7-NEXT:    .cfi_offset r21, -88
-; CHECK-PWR7-NEXT:    .cfi_offset r22, -80
+; CHECK-PWR7-NEXT:    stdu r1, -416(r1)
+; CHECK-PWR7-NEXT:    .cfi_def_cfa_offset 416
 ; CHECK-PWR7-NEXT:    .cfi_offset r23, -72
 ; CHECK-PWR7-NEXT:    .cfi_offset r24, -64
 ; CHECK-PWR7-NEXT:    .cfi_offset r25, -56
@@ -893,167 +886,156 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR7-NEXT:    .cfi_offset r29, -24
 ; CHECK-PWR7-NEXT:    .cfi_offset r30, -16
 ; CHECK-PWR7-NEXT:    addi r3, r1, 304
-; CHECK-PWR7-NEXT:    std r16, 336(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r23, 344(r1) # 8-byte Folded Spill
 ; CHECK-PWR7-NEXT:    addi r4, r1, 320
-; CHECK-PWR7-NEXT:    std r17, 344(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r18, 352(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r19, 360(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r20, 368(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r21, 376(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r22, 384(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r23, 392(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r24, 400(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r25, 408(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r26, 416(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r27, 424(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r28, 432(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r29, 440(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT:    std r30, 448(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r24, 352(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r25, 360(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r26, 368(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r27, 376(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r28, 384(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r29, 392(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT:    std r30, 400(r1) # 8-byte Folded Spill
 ; CHECK-PWR7-NEXT:    stxvw4x v2, 0, r3
 ; CHECK-PWR7-NEXT:    lbz r3, 304(r1)
 ; CHECK-PWR7-NEXT:    stxvw4x v3, 0, r4
-; CHECK-PWR7-NEXT:    lbz r4, 320(r1)
-; CHECK-PWR7-NEXT:    lbz r5, 305(r1)
-; CHECK-PWR7-NEXT:    lbz r6, 321(r1)
-; CHECK-PWR7-NEXT:    lbz r7, 306(r1)
-; CHECK-PWR7-NEXT:    lbz r8, 322(r1)
 ; CHECK-PWR7-NEXT:    lbz r9, 307(r1)
-; CHECK-PWR7-NEXT:    sub r3, r3, r4
 ; CHECK-PWR7-NEXT:    lbz r10, 323(r1)
 ; CHECK-PWR7-NEXT:    lbz r11, 308(r1)
-; CHECK-PWR7-NEXT:    sub r5, r5, r6
 ; CHECK-PWR7-NEXT:    lbz r12, 324(r1)
 ; CHECK-PWR7-NEXT:    lbz r0, 309(r1)
-; CHECK-PWR7-NEXT:    sub r6, r7, r8
 ; CHECK-PWR7-NEXT:    lbz r30, 325(r1)
-; CHECK-PWR7-NEXT:    lbz r29, 310(r1)
 ; CHECK-PWR7-NEXT:    sub r9, r9, r10
+; CHECK-PWR7-NEXT:    lbz r29, 310(r1)
 ; CHECK-PWR7-NEXT:    lbz r28, 326(r1)
-; CHECK-PWR7-NEXT:    lbz r23, 313(r1)
-; CHECK-PWR7-NEXT:    sub r10, r11, r12
-; CHECK-PWR7-NEXT:    lbz r22, 329(r1)
-; CHECK-PWR7-NEXT:    lbz r4, 314(r1)
-; CHECK-PWR7-NEXT:    sub r0, r0, r30
-; CHECK-PWR7-NEXT:    lbz r21, 330(r1)
-; CHECK-PWR7-NEXT:    lbz r7, 315(r1)
-; CHECK-PWR7-NEXT:    sub r30, r29, r28
-; CHECK-PWR7-NEXT:    srawi r20, r0, 31
-; CHECK-PWR7-NEXT:    lbz r8, 331(r1)
-; CHECK-PWR7-NEXT:    lbz r11, 316(r1)
-; CHECK-PWR7-NEXT:    sub r23, r23, r22
-; CHECK-PWR7-NEXT:    srawi r19, r30, 31
-; CHECK-PWR7-NEXT:    lbz r12, 332(r1)
-; CHECK-PWR7-NEXT:    lbz r29, 317(r1)
-; CHECK-PWR7-NEXT:    sub r4, r4, r21
-; CHECK-PWR7-NEXT:    add r0, r0, r20
-; CHECK-PWR7-NEXT:    lbz r28, 333(r1)
-; CHECK-PWR7-NEXT:    lbz r22, 319(r1)
-; CHECK-PWR7-NEXT:    sub r7, r7, r8
-; CHECK-PWR7-NEXT:    add r30, r30, r19
-; CHECK-PWR7-NEXT:    lbz r21, 335(r1)
+; CHECK-PWR7-NEXT:    sub r11, r11, r12
 ; CHECK-PWR7-NEXT:    lbz r27, 311(r1)
-; CHECK-PWR7-NEXT:    sub r8, r11, r12
-; CHECK-PWR7-NEXT:    xor r0, r0, r20
 ; CHECK-PWR7-NEXT:    lbz r26, 327(r1)
+; CHECK-PWR7-NEXT:    sub r0, r0, r30
 ; CHECK-PWR7-NEXT:    lbz r25, 312(r1)
-; CHECK-PWR7-NEXT:    sub r11, r29, r28
-; CHECK-PWR7-NEXT:    srawi r28, r3, 31
 ; CHECK-PWR7-NEXT:    lbz r24, 328(r1)
-; CHECK-PWR7-NEXT:    sub r29, r22, r21
-; CHECK-PWR7-NEXT:    add r3, r3, r28
-; CHECK-PWR7-NEXT:    xor r30, r30, r19
+; CHECK-PWR7-NEXT:    sub r29, r29, r28
+; CHECK-PWR7-NEXT:    lbz r10, 315(r1)
+; CHECK-PWR7-NEXT:    lbz r12, 331(r1)
 ; CHECK-PWR7-NEXT:    sub r27, r27, r26
-; CHECK-PWR7-NEXT:    srawi r17, r29, 31
+; CHECK-PWR7-NEXT:    lbz r30, 316(r1)
+; CHECK-PWR7-NEXT:    lbz r28, 332(r1)
+; CHECK-PWR7-NEXT:    sub r25, r25, r24
+; CHECK-PWR7-NEXT:    lbz r4, 320(r1)
+; CHECK-PWR7-NEXT:    lbz r5, 305(r1)
+; CHECK-PWR7-NEXT:    sub r10, r10, r12
+; CHECK-PWR7-NEXT:    lbz r6, 321(r1)
+; CHECK-PWR7-NEXT:    lbz r26, 317(r1)
+; CHECK-PWR7-NEXT:    sub r30, r30, r28
+; CHECK-PWR7-NEXT:    lbz r24, 333(r1)
+; CHECK-PWR7-NEXT:    lbz r12, 319(r1)
+; CHECK-PWR7-NEXT:    sub r3, r3, r4
+; CHECK-PWR7-NEXT:    lbz r28, 335(r1)
+; CHECK-PWR7-NEXT:    lbz r7, 306(r1)
+; CHECK-PWR7-NEXT:    sub r5, r5, r6
+; CHECK-PWR7-NEXT:    lbz r8, 322(r1)
+; CHECK-PWR7-NEXT:    sub r26, r26, r24
+; CHECK-PWR7-NEXT:    srawi r24, r5, 31
+; CHECK-PWR7-NEXT:    lbz r23, 313(r1)
+; CHECK-PWR7-NEXT:    sub r12, r12, r28
+; CHECK-PWR7-NEXT:    srawi r28, r3, 31
+; CHECK-PWR7-NEXT:    xor r5, r5, r24
+; CHECK-PWR7-NEXT:    lbz r4, 329(r1)
+; CHECK-PWR7-NEXT:    sub r7, r7, r8
 ; CHECK-PWR7-NEXT:    xor r3, r3, r28
-; CHECK-PWR7-NEXT:    ld r20, 368(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    sub r26, r25, r24
-; CHECK-PWR7-NEXT:    lbz r25, 318(r1)
-; CHECK-PWR7-NEXT:    lbz r24, 334(r1)
-; CHECK-PWR7-NEXT:    add r29, r29, r17
-; CHECK-PWR7-NEXT:    xor r29, r29, r17
-; CHECK-PWR7-NEXT:    srawi r18, r27, 31
-; CHECK-PWR7-NEXT:    ld r19, 360(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    sub r12, r25, r24
-; CHECK-PWR7-NEXT:    stb r29, 288(r1)
-; CHECK-PWR7-NEXT:    add r28, r27, r18
-; CHECK-PWR7-NEXT:    srawi r29, r12, 31
-; CHECK-PWR7-NEXT:    srawi r16, r26, 31
-; CHECK-PWR7-NEXT:    xor r28, r28, r18
-; CHECK-PWR7-NEXT:    ld r18, 352(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    add r12, r12, r29
-; CHECK-PWR7-NEXT:    add r27, r26, r16
-; CHECK-PWR7-NEXT:    xor r12, r12, r29
-; CHECK-PWR7-NEXT:    srawi r29, r7, 31
-; CHECK-PWR7-NEXT:    xor r27, r27, r16
-; CHECK-PWR7-NEXT:    ld r16, 336(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    srawi r26, r8, 31
-; CHECK-PWR7-NEXT:    srawi r25, r5, 31
-; CHECK-PWR7-NEXT:    add r7, r7, r29
-; CHECK-PWR7-NEXT:    add r8, r8, r26
-; CHECK-PWR7-NEXT:    srawi r24, r6, 31
-; CHECK-PWR7-NEXT:    add r5, r5, r25
-; CHECK-PWR7-NEXT:    xor r7, r7, r29
-; CHECK-PWR7-NEXT:    srawi r22, r9, 31
-; CHECK-PWR7-NEXT:    srawi r21, r10, 31
-; CHECK-PWR7-NEXT:    xor r8, r8, r26
-; CHECK-PWR7-NEXT:    xor r5, r5, r25
-; CHECK-PWR7-NEXT:    srawi r17, r11, 31
-; CHECK-PWR7-NEXT:    srawi r26, r23, 31
-; CHECK-PWR7-NEXT:    add r6, r6, r24
-; CHECK-PWR7-NEXT:    add r9, r9, r22
-; CHECK-PWR7-NEXT:    srawi r29, r4, 31
-; CHECK-PWR7-NEXT:    add r10, r10, r21
-; CHECK-PWR7-NEXT:    add r11, r11, r17
-; CHECK-PWR7-NEXT:    add r25, r23, r26
-; CHECK-PWR7-NEXT:    add r4, r4, r29
-; CHECK-PWR7-NEXT:    xor r6, r6, r24
-; CHECK-PWR7-NEXT:    xor r9, r9, r22
-; CHECK-PWR7-NEXT:    xor r10, r10, r21
-; CHECK-PWR7-NEXT:    xor r11, r11, r17
-; CHECK-PWR7-NEXT:    xor r4, r4, r29
-; CHECK-PWR7-NEXT:    xor r26, r25, r26
-; CHECK-PWR7-NEXT:    addi r29, r1, 224
-; CHECK-PWR7-NEXT:    stb r12, 272(r1)
+; CHECK-PWR7-NEXT:    lbz r6, 314(r1)
+; CHECK-PWR7-NEXT:    lbz r8, 330(r1)
+; CHECK-PWR7-NEXT:    sub r3, r3, r28
+; CHECK-PWR7-NEXT:    srawi r28, r7, 31
+; CHECK-PWR7-NEXT:    sub r5, r5, r24
+; CHECK-PWR7-NEXT:    srawi r24, r9, 31
+; CHECK-PWR7-NEXT:    xor r7, r7, r28
+; CHECK-PWR7-NEXT:    xor r9, r9, r24
+; CHECK-PWR7-NEXT:    sub r7, r7, r28
+; CHECK-PWR7-NEXT:    srawi r28, r11, 31
+; CHECK-PWR7-NEXT:    sub r9, r9, r24
+; CHECK-PWR7-NEXT:    srawi r24, r0, 31
+; CHECK-PWR7-NEXT:    xor r11, r11, r28
+; CHECK-PWR7-NEXT:    xor r0, r0, r24
+; CHECK-PWR7-NEXT:    sub r11, r11, r28
+; CHECK-PWR7-NEXT:    srawi r28, r29, 31
+; CHECK-PWR7-NEXT:    sub r0, r0, r24
+; CHECK-PWR7-NEXT:    srawi r24, r27, 31
+; CHECK-PWR7-NEXT:    sub r4, r23, r4
+; CHECK-PWR7-NEXT:    xor r29, r29, r28
+; CHECK-PWR7-NEXT:    lbz r23, 318(r1)
+; CHECK-PWR7-NEXT:    xor r27, r27, r24
+; CHECK-PWR7-NEXT:    sub r29, r29, r28
+; CHECK-PWR7-NEXT:    srawi r28, r25, 31
+; CHECK-PWR7-NEXT:    sub r27, r27, r24
+; CHECK-PWR7-NEXT:    srawi r24, r4, 31
+; CHECK-PWR7-NEXT:    sub r6, r6, r8
+; CHECK-PWR7-NEXT:    xor r25, r25, r28
+; CHECK-PWR7-NEXT:    lbz r8, 334(r1)
+; CHECK-PWR7-NEXT:    xor r4, r4, r24
+; CHECK-PWR7-NEXT:    sub r28, r25, r28
+; CHECK-PWR7-NEXT:    srawi r25, r6, 31
+; CHECK-PWR7-NEXT:    sub r4, r4, r24
+; CHECK-PWR7-NEXT:    srawi r24, r10, 31
+; CHECK-PWR7-NEXT:    xor r6, r6, r25
+; CHECK-PWR7-NEXT:    xor r10, r10, r24
+; CHECK-PWR7-NEXT:    sub r6, r6, r25
+; CHECK-PWR7-NEXT:    srawi r25, r30, 31
+; CHECK-PWR7-NEXT:    sub r10, r10, r24
+; CHECK-PWR7-NEXT:    srawi r24, r26, 31
+; CHECK-PWR7-NEXT:    sub r8, r23, r8
+; CHECK-PWR7-NEXT:    xor r30, r30, r25
+; CHECK-PWR7-NEXT:    ld r23, 344(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    xor r26, r26, r24
+; CHECK-PWR7-NEXT:    sub r30, r30, r25
+; CHECK-PWR7-NEXT:    srawi r25, r12, 31
+; CHECK-PWR7-NEXT:    sub r26, r26, r24
+; CHECK-PWR7-NEXT:    srawi r24, r8, 31
+; CHECK-PWR7-NEXT:    xor r12, r12, r25
+; CHECK-PWR7-NEXT:    xor r8, r8, r24
+; CHECK-PWR7-NEXT:    sub r12, r12, r25
+; CHECK-PWR7-NEXT:    addi r25, r1, 272
+; CHECK-PWR7-NEXT:    sub r8, r8, r24
+; CHECK-PWR7-NEXT:    stb r12, 288(r1)
 ; CHECK-PWR7-NEXT:    addi r12, r1, 288
-; CHECK-PWR7-NEXT:    addi r25, r1, 208
-; CHECK-PWR7-NEXT:    stb r11, 256(r1)
-; CHECK-PWR7-NEXT:    addi r11, r1, 272
-; CHECK-PWR7-NEXT:    ld r24, 400(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    stb r8, 240(r1)
-; CHECK-PWR7-NEXT:    stb r7, 224(r1)
-; CHECK-PWR7-NEXT:    stb r4, 208(r1)
-; CHECK-PWR7-NEXT:    stb r26, 192(r1)
-; CHECK-PWR7-NEXT:    stb r27, 176(r1)
-; CHECK-PWR7-NEXT:    stb r28, 160(r1)
-; CHECK-PWR7-NEXT:    stb r30, 144(r1)
+; CHECK-PWR7-NEXT:    stb r8, 272(r1)
+; CHECK-PWR7-NEXT:    stb r26, 256(r1)
+; CHECK-PWR7-NEXT:    stb r30, 240(r1)
+; CHECK-PWR7-NEXT:    stb r10, 224(r1)
+; CHECK-PWR7-NEXT:    stb r6, 208(r1)
+; CHECK-PWR7-NEXT:    stb r4, 192(r1)
+; CHECK-PWR7-NEXT:    stb r28, 176(r1)
+; CHECK-PWR7-NEXT:    stb r27, 160(r1)
+; CHECK-PWR7-NEXT:    stb r29, 144(r1)
 ; CHECK-PWR7-NEXT:    stb r0, 128(r1)
-; CHECK-PWR7-NEXT:    stb r10, 112(r1)
+; CHECK-PWR7-NEXT:    stb r11, 112(r1)
 ; CHECK-PWR7-NEXT:    stb r9, 96(r1)
-; CHECK-PWR7-NEXT:    stb r6, 80(r1)
+; CHECK-PWR7-NEXT:    stb r7, 80(r1)
 ; CHECK-PWR7-NEXT:    stb r5, 64(r1)
 ; CHECK-PWR7-NEXT:    stb r3, 48(r1)
 ; CHECK-PWR7-NEXT:    addi r8, r1, 256
-; CHECK-PWR7-NEXT:    addi r7, r1, 240
+; CHECK-PWR7-NEXT:    addi r26, r1, 240
 ; CHECK-PWR7-NEXT:    lxvw4x v2, 0, r12
-; CHECK-PWR7-NEXT:    lxvw4x v3, 0, r11
+; CHECK-PWR7-NEXT:    lxvw4x v3, 0, r25
+; CHECK-PWR7-NEXT:    addi r10, r1, 224
+; CHECK-PWR7-NEXT:    addi r30, r1, 208
 ; CHECK-PWR7-NEXT:    addi r3, r1, 192
 ; CHECK-PWR7-NEXT:    addi r4, r1, 176
 ; CHECK-PWR7-NEXT:    addi r5, r1, 160
 ; CHECK-PWR7-NEXT:    addi r6, r1, 144
 ; CHECK-PWR7-NEXT:    lxvw4x v4, 0, r8
-; CHECK-PWR7-NEXT:    lxvw4x v5, 0, r7
-; CHECK-PWR7-NEXT:    lxvw4x v0, 0, r29
-; CHECK-PWR7-NEXT:    lxvw4x v1, 0, r25
+; CHECK-PWR7-NEXT:    lxvw4x v5, 0, r26
 ; CHECK-PWR7-NEXT:    addi r7, r1, 128
 ; CHECK-PWR7-NEXT:    addi r8, r1, 112
-; CHECK-PWR7-NEXT:    lxvw4x v6, 0, r3
-; CHECK-PWR7-NEXT:    lxvw4x v7, 0, r4
+; CHECK-PWR7-NEXT:    lxvw4x v0, 0, r10
+; CHECK-PWR7-NEXT:    lxvw4x v1, 0, r30
 ; CHECK-PWR7-NEXT:    vmrghb v2, v3, v2
 ; CHECK-PWR7-NEXT:    addi r9, r1, 96
-; CHECK-PWR7-NEXT:    lxvw4x v3, 0, r5
-; CHECK-PWR7-NEXT:    lxvw4x v8, 0, r6
+; CHECK-PWR7-NEXT:    lxvw4x v6, 0, r3
+; CHECK-PWR7-NEXT:    lxvw4x v7, 0, r4
 ; CHECK-PWR7-NEXT:    addi r3, r1, 80
 ; CHECK-PWR7-NEXT:    addi r4, r1, 64
+; CHECK-PWR7-NEXT:    lxvw4x v3, 0, r5
+; CHECK-PWR7-NEXT:    lxvw4x v8, 0, r6
 ; CHECK-PWR7-NEXT:    addi r5, r1, 48
 ; CHECK-PWR7-NEXT:    vmrghb v4, v5, v4
 ; CHECK-PWR7-NEXT:    lxvw4x v5, 0, r7
@@ -1063,29 +1045,26 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
 ; CHECK-PWR7-NEXT:    lxvw4x v10, 0, r3
 ; CHECK-PWR7-NEXT:    vmrghb v6, v7, v6
 ; CHECK-PWR7-NEXT:    lxvw4x v7, 0, r4
-; CHECK-PWR7-NEXT:    ld r30, 448(r1) # 8-byte Folded Reload
 ; CHECK-PWR7-NEXT:    vmrghb v3, v8, v3
 ; CHECK-PWR7-NEXT:    lxvw4x v8, 0, r5
-; CHECK-PWR7-NEXT:    ld r29, 440(r1) # 8-byte Folded Reload
 ; CHECK-PWR7-NEXT:    vmrghb v5, v9, v5
-; CHECK-PWR7-NEXT:    ld r28, 432(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    ld r27, 424(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    ld r30, 400(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    ld r29, 392(r1) # 8-byte Folded Reload
 ; CHECK-PWR7-NEXT:    vmrghb v1, v10, v1
-; CHECK-PWR7-NEXT:    ld r26, 416(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    ld r25, 408(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    ld r28, 384(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    ld r27, 376(r1) # 8-byte Folded Reload
 ; CHECK-PWR7-NEXT:    vmrghb v7, v8, v7
-; CHECK-PWR7-NEXT:    ld r23, 392(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    ld r22, 384(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    ld r26, 368(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    ld r25, 360(r1) # 8-byte Folded Reload
 ; CHECK-PWR7-NEXT:    vmrghh v2, v4, v2
-; CHECK-PWR7-NEXT:    ld r21, 376(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT:    ld r17, 344(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT:    ld r24, 352(r1) # 8-byte Folded Reload
 ; CHECK-PWR7-NEXT:    vmrghh v4, v6, v0
 ; CHECK-PWR7-NEXT:    vmrghh v3, v5, v3
 ; CHECK-PWR7-NEXT:    vmrghh v5, v7, v1
 ; CHECK-PWR7-NEXT:    vmrghw v2, v4, v2
 ; CHECK-PWR7-NEXT:    vmrghw v3, v5, v3
 ; CHECK-PWR7-NEXT:    xxmrghd v2, v3, v2
-; CHECK-PWR7-NEXT:    addi r1, r1, 464
+; CHECK-PWR7-NEXT:    addi r1, r1, 416
 ; CHECK-PWR7-NEXT:    blr
 entry:
   %vecext = extractelement <16 x i8> %a, i32 0

diff  --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 64c9e35146f63..29e481198246c 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -734,8 +734,8 @@ define i32 @abs_i32(i32 %x) {
 ; RV32I-LABEL: abs_i32:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    srai a1, a0, 31
-; RV32I-NEXT:    add a0, a0, a1
 ; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    sub a0, a0, a1
 ; RV32I-NEXT:    ret
 ;
 ; RV32ZBB-LABEL: abs_i32:

diff  --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index a5e3061f50953..0127ac4d33a5b 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -947,8 +947,8 @@ define i32 @abs_i32(i32 %x) {
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    sext.w a0, a0
 ; RV64I-NEXT:    srai a1, a0, 63
-; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sub a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: abs_i32:
@@ -961,14 +961,13 @@ define i32 @abs_i32(i32 %x) {
   ret i32 %abs
 }
 
-; FIXME: We can remove the sext.w by using addw for RV64I and negw for RV64ZBB.
+; FIXME: We can remove the sext.w on RV64ZBB by using negw.
 define signext i32 @abs_i32_sext(i32 signext %x) {
 ; RV64I-LABEL: abs_i32_sext:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    srai a1, a0, 63
-; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    xor a0, a0, a1
-; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    subw a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: abs_i32_sext:
@@ -987,8 +986,8 @@ define i64 @abs_i64(i64 %x) {
 ; RV64I-LABEL: abs_i64:
 ; RV64I:       # %bb.0:
 ; RV64I-NEXT:    srai a1, a0, 63
-; RV64I-NEXT:    add a0, a0, a1
 ; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    sub a0, a0, a1
 ; RV64I-NEXT:    ret
 ;
 ; RV64ZBB-LABEL: abs_i64:

diff  --git a/llvm/test/CodeGen/Thumb/iabs.ll b/llvm/test/CodeGen/Thumb/iabs.ll
index 2d51288b5242a..6bebea67e265a 100644
--- a/llvm/test/CodeGen/Thumb/iabs.ll
+++ b/llvm/test/CodeGen/Thumb/iabs.ll
@@ -6,8 +6,8 @@ define i8 @test_i8(i8 %a) nounwind {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    sxtb r1, r0
 ; CHECK-NEXT:    asrs r1, r1, #7
-; CHECK-NEXT:    adds r0, r0, r1
 ; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    subs r0, r0, r1
 ; CHECK-NEXT:    bx lr
   %tmp1neg = sub i8 0, %a
   %b = icmp sgt i8 %a, -1
@@ -20,8 +20,8 @@ define i16 @test_i16(i16 %a) nounwind {
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    sxth r1, r0
 ; CHECK-NEXT:    asrs r1, r1, #15
-; CHECK-NEXT:    adds r0, r0, r1
 ; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    subs r0, r0, r1
 ; CHECK-NEXT:    bx lr
   %tmp1neg = sub i16 0, %a
   %b = icmp sgt i16 %a, -1
@@ -33,8 +33,8 @@ define i32 @test_i32(i32 %a) nounwind {
 ; CHECK-LABEL: test_i32:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    asrs r1, r0, #31
-; CHECK-NEXT:    adds r0, r0, r1
 ; CHECK-NEXT:    eors r0, r1
+; CHECK-NEXT:    subs r0, r0, r1
 ; CHECK-NEXT:    bx lr
   %tmp1neg = sub i32 0, %a
   %b = icmp sgt i32 %a, -1
@@ -46,10 +46,10 @@ define i64 @test_i64(i64 %a) nounwind {
 ; CHECK-LABEL: test_i64:
 ; CHECK:       @ %bb.0:
 ; CHECK-NEXT:    asrs r2, r1, #31
-; CHECK-NEXT:    adds r0, r0, r2
-; CHECK-NEXT:    adcs r1, r2
-; CHECK-NEXT:    eors r0, r2
 ; CHECK-NEXT:    eors r1, r2
+; CHECK-NEXT:    eors r0, r2
+; CHECK-NEXT:    subs r0, r0, r2
+; CHECK-NEXT:    sbcs r1, r2
 ; CHECK-NEXT:    bx lr
   %tmp1neg = sub i64 0, %a
   %b = icmp sgt i64 %a, -1

diff  --git a/llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll b/llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll
index bd091cf2b6f84..152ac0fa3f168 100644
--- a/llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll
+++ b/llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll
@@ -1,7 +1,51 @@
-; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | FileCheck %s
-; RUN: llc -mtriple=thumbv6-eabi %s -verify-machineinstrs -o - | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | FileCheck %s --check-prefix=THUMB
+; RUN: llc -mtriple=thumbv6-eabi %s -verify-machineinstrs -o - | FileCheck %s --check-prefix=THUMBV6
+
+; The scheduler used to ignore OptionalDefs, and could unwittingly insert
+; a flag-setting instruction in between an ADDS and the corresponding ADC.
+
+; FIXME: The ABS lowering changed to XOR followed by SUB so this may no longer
+; be testing what it used to.
 
 define i1 @test(i64 %arg) {
+; THUMB-LABEL: test:
+; THUMB:       @ %bb.0: @ %entry
+; THUMB-NEXT:    .save {r4, lr}
+; THUMB-NEXT:    push {r4, lr}
+; THUMB-NEXT:    asrs r2, r1, #31
+; THUMB-NEXT:    movs r3, r1
+; THUMB-NEXT:    eors r3, r2
+; THUMB-NEXT:    movs r4, r0
+; THUMB-NEXT:    eors r4, r2
+; THUMB-NEXT:    subs r4, r4, r2
+; THUMB-NEXT:    sbcs r3, r2
+; THUMB-NEXT:    eors r3, r1
+; THUMB-NEXT:    eors r0, r4
+; THUMB-NEXT:    orrs r0, r3
+; THUMB-NEXT:    rsbs r1, r0, #0
+; THUMB-NEXT:    adcs r0, r1
+; THUMB-NEXT:    pop {r4}
+; THUMB-NEXT:    pop {r1}
+; THUMB-NEXT:    bx r1
+;
+; THUMBV6-LABEL: test:
+; THUMBV6:       @ %bb.0: @ %entry
+; THUMBV6-NEXT:    .save {r4, lr}
+; THUMBV6-NEXT:    push {r4, lr}
+; THUMBV6-NEXT:    asrs r2, r1, #31
+; THUMBV6-NEXT:    mov r3, r1
+; THUMBV6-NEXT:    eors r3, r2
+; THUMBV6-NEXT:    mov r4, r0
+; THUMBV6-NEXT:    eors r4, r2
+; THUMBV6-NEXT:    subs r4, r4, r2
+; THUMBV6-NEXT:    sbcs r3, r2
+; THUMBV6-NEXT:    eors r3, r1
+; THUMBV6-NEXT:    eors r0, r4
+; THUMBV6-NEXT:    orrs r0, r3
+; THUMBV6-NEXT:    rsbs r1, r0, #0
+; THUMBV6-NEXT:    adcs r0, r1
+; THUMBV6-NEXT:    pop {r4, pc}
 entry:
   %ispos = icmp sgt i64 %arg, -1
   %neg = sub i64 0, %arg
@@ -9,10 +53,3 @@ entry:
   %cmp2 = icmp eq i64 %sel, %arg
   ret i1 %cmp2
 }
-
-; The scheduler used to ignore OptionalDefs, and could unwittingly insert
-; a flag-setting instruction in between an ADDS and the corresponding ADC.
-
-; CHECK: adds
-; CHECK-NOT: eors
-; CHECK: adcs

diff  --git a/llvm/test/CodeGen/Thumb2/abs.ll b/llvm/test/CodeGen/Thumb2/abs.ll
index 02a2a14c2a5cc..88259ba758803 100644
--- a/llvm/test/CodeGen/Thumb2/abs.ll
+++ b/llvm/test/CodeGen/Thumb2/abs.ll
@@ -120,18 +120,18 @@ define i64 @abs64(i64 %x) {
 ; CHECKT1-LABEL: abs64:
 ; CHECKT1:       @ %bb.0:
 ; CHECKT1-NEXT:    asrs r2, r1, #31
-; CHECKT1-NEXT:    adds r0, r0, r2
-; CHECKT1-NEXT:    adcs r1, r2
-; CHECKT1-NEXT:    eors r0, r2
 ; CHECKT1-NEXT:    eors r1, r2
+; CHECKT1-NEXT:    eors r0, r2
+; CHECKT1-NEXT:    subs r0, r0, r2
+; CHECKT1-NEXT:    sbcs r1, r2
 ; CHECKT1-NEXT:    bx lr
 ;
 ; CHECKT2-LABEL: abs64:
 ; CHECKT2:       @ %bb.0:
-; CHECKT2-NEXT:    adds.w r0, r0, r1, asr #31
-; CHECKT2-NEXT:    adc.w r2, r1, r1, asr #31
 ; CHECKT2-NEXT:    eor.w r0, r0, r1, asr #31
-; CHECKT2-NEXT:    eor.w r1, r2, r1, asr #31
+; CHECKT2-NEXT:    eor.w r2, r1, r1, asr #31
+; CHECKT2-NEXT:    subs.w r0, r0, r1, asr #31
+; CHECKT2-NEXT:    sbc.w r1, r2, r1, asr #31
 ; CHECKT2-NEXT:    bx lr
   %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true)
   ret i64 %abs
@@ -141,8 +141,8 @@ define i32 @abs32(i32 %x) {
 ; CHECKT1-LABEL: abs32:
 ; CHECKT1:       @ %bb.0:
 ; CHECKT1-NEXT:    asrs r1, r0, #31
-; CHECKT1-NEXT:    adds r0, r0, r1
 ; CHECKT1-NEXT:    eors r0, r1
+; CHECKT1-NEXT:    subs r0, r0, r1
 ; CHECKT1-NEXT:    bx lr
 ;
 ; CHECKT2-LABEL: abs32:
@@ -160,15 +160,15 @@ define i16 @abs16(i16 %x) {
 ; CHECKT1:       @ %bb.0:
 ; CHECKT1-NEXT:    sxth r1, r0
 ; CHECKT1-NEXT:    asrs r1, r1, #15
-; CHECKT1-NEXT:    adds r0, r0, r1
 ; CHECKT1-NEXT:    eors r0, r1
+; CHECKT1-NEXT:    subs r0, r0, r1
 ; CHECKT1-NEXT:    bx lr
 ;
 ; CHECKT2-LABEL: abs16:
 ; CHECKT2:       @ %bb.0:
 ; CHECKT2-NEXT:    sxth r1, r0
-; CHECKT2-NEXT:    add.w r0, r0, r1, asr #15
 ; CHECKT2-NEXT:    eor.w r0, r0, r1, asr #15
+; CHECKT2-NEXT:    sub.w r0, r0, r1, asr #15
 ; CHECKT2-NEXT:    bx lr
   %abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true)
   ret i16 %abs
@@ -180,26 +180,26 @@ define i128 @abs128(i128 %x) {
 ; CHECKT1-NEXT:    .save {r4, lr}
 ; CHECKT1-NEXT:    push {r4, lr}
 ; CHECKT1-NEXT:    asrs r4, r3, #31
-; CHECKT1-NEXT:    adds r0, r0, r4
-; CHECKT1-NEXT:    adcs r1, r4
-; CHECKT1-NEXT:    adcs r2, r4
-; CHECKT1-NEXT:    adcs r3, r4
-; CHECKT1-NEXT:    eors r0, r4
-; CHECKT1-NEXT:    eors r1, r4
-; CHECKT1-NEXT:    eors r2, r4
 ; CHECKT1-NEXT:    eors r3, r4
+; CHECKT1-NEXT:    eors r2, r4
+; CHECKT1-NEXT:    eors r1, r4
+; CHECKT1-NEXT:    eors r0, r4
+; CHECKT1-NEXT:    subs r0, r0, r4
+; CHECKT1-NEXT:    sbcs r1, r4
+; CHECKT1-NEXT:    sbcs r2, r4
+; CHECKT1-NEXT:    sbcs r3, r4
 ; CHECKT1-NEXT:    pop {r4, pc}
 ;
 ; CHECKT2-LABEL: abs128:
 ; CHECKT2:       @ %bb.0:
-; CHECKT2-NEXT:    adds.w r0, r0, r3, asr #31
-; CHECKT2-NEXT:    adcs.w r1, r1, r3, asr #31
 ; CHECKT2-NEXT:    eor.w r0, r0, r3, asr #31
-; CHECKT2-NEXT:    adcs.w r2, r2, r3, asr #31
 ; CHECKT2-NEXT:    eor.w r1, r1, r3, asr #31
-; CHECKT2-NEXT:    adc.w r12, r3, r3, asr #31
+; CHECKT2-NEXT:    subs.w r0, r0, r3, asr #31
 ; CHECKT2-NEXT:    eor.w r2, r2, r3, asr #31
-; CHECKT2-NEXT:    eor.w r3, r12, r3, asr #31
+; CHECKT2-NEXT:    sbcs.w r1, r1, r3, asr #31
+; CHECKT2-NEXT:    eor.w r12, r3, r3, asr #31
+; CHECKT2-NEXT:    sbcs.w r2, r2, r3, asr #31
+; CHECKT2-NEXT:    sbc.w r3, r12, r3, asr #31
 ; CHECKT2-NEXT:    bx lr
   %abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true)
   ret i128 %abs

diff  --git a/llvm/test/CodeGen/WebAssembly/PR41149.ll b/llvm/test/CodeGen/WebAssembly/PR41149.ll
index 6a8dee1906f1b..0913bf0eba220 100644
--- a/llvm/test/CodeGen/WebAssembly/PR41149.ll
+++ b/llvm/test/CodeGen/WebAssembly/PR41149.ll
@@ -13,9 +13,9 @@ define void @mod() {
 ; CHECK-NEXT: i32.const       31
 ; CHECK-NEXT: i32.shr_s
 ; CHECK-NEXT: local.tee       0
-; CHECK-NEXT: i32.add
-; CHECK-NEXT: local.get       0
 ; CHECK-NEXT: i32.xor
+; CHECK-NEXT: local.get       0
+; CHECK-NEXT: i32.sub
 ; CHECK-NEXT: i32.store8      0
   %tmp = load <4 x i8>, <4 x i8>* undef
   %tmp2 = icmp slt <4 x i8> %tmp, zeroinitializer

diff  --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll
index c03923aa47ff5..df83381ababd3 100644
--- a/llvm/test/CodeGen/X86/abs.ll
+++ b/llvm/test/CodeGen/X86/abs.ll
@@ -25,11 +25,11 @@ declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
 define i8 @test_i8(i8 %a) nounwind {
 ; X64-LABEL: test_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    sarb $7, %cl
-; X64-NEXT:    leal (%rdi,%rcx), %eax
 ; X64-NEXT:    xorb %cl, %al
+; X64-NEXT:    subb %cl, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
 ;
@@ -38,8 +38,8 @@ define i8 @test_i8(i8 %a) nounwind {
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    sarb $7, %cl
-; X86-NEXT:    addb %cl, %al
 ; X86-NEXT:    xorb %cl, %al
+; X86-NEXT:    subb %cl, %al
 ; X86-NEXT:    retl
   %r = call i8 @llvm.abs.i8(i8 %a, i1 false)
   ret i8 %r
@@ -197,8 +197,8 @@ define <2 x i32> @test_v2i32(<2 x i32> %a) nounwind {
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movdqa %xmm0, %xmm1
 ; SSE-NEXT:    psrad $31, %xmm1
-; SSE-NEXT:    paddd %xmm1, %xmm0
 ; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    psubd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v2i32:
@@ -226,8 +226,8 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) nounwind {
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movdqa %xmm0, %xmm1
 ; SSE-NEXT:    psrad $31, %xmm1
-; SSE-NEXT:    paddd %xmm1, %xmm0
 ; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    psubd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v3i32:
@@ -261,8 +261,8 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) nounwind {
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movdqa %xmm0, %xmm1
 ; SSE-NEXT:    psrad $31, %xmm1
-; SSE-NEXT:    paddd %xmm1, %xmm0
 ; SSE-NEXT:    pxor %xmm1, %xmm0
+; SSE-NEXT:    psubd %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX-LABEL: test_v4i32:
@@ -309,12 +309,12 @@ define <8 x i32> @test_v8i32(<8 x i32> %a) nounwind {
 ; SSE:       # %bb.0:
 ; SSE-NEXT:    movdqa %xmm0, %xmm2
 ; SSE-NEXT:    psrad $31, %xmm2
-; SSE-NEXT:    paddd %xmm2, %xmm0
 ; SSE-NEXT:    pxor %xmm2, %xmm0
+; SSE-NEXT:    psubd %xmm2, %xmm0
 ; SSE-NEXT:    movdqa %xmm1, %xmm2
 ; SSE-NEXT:    psrad $31, %xmm2
-; SSE-NEXT:    paddd %xmm2, %xmm1
 ; SSE-NEXT:    pxor %xmm2, %xmm1
+; SSE-NEXT:    psubd %xmm2, %xmm1
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: test_v8i32:
@@ -496,86 +496,86 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movb %cl, %al
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %cl
 ; X86-NEXT:    xorb %al, %cl
+; X86-NEXT:    subb %al, %cl
 ; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; X86-NEXT:    movb %dl, %al
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %dl
 ; X86-NEXT:    xorb %al, %dl
+; X86-NEXT:    subb %al, %dl
 ; X86-NEXT:    movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; X86-NEXT:    movb %ah, %al
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %ah
 ; X86-NEXT:    xorb %al, %ah
+; X86-NEXT:    subb %al, %ah
 ; X86-NEXT:    movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; X86-NEXT:    movb %ch, %al
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %ch
 ; X86-NEXT:    xorb %al, %ch
+; X86-NEXT:    subb %al, %ch
 ; X86-NEXT:    movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; X86-NEXT:    movb %dh, %al
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %dh
 ; X86-NEXT:    xorb %al, %dh
+; X86-NEXT:    subb %al, %dh
 ; X86-NEXT:    movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; X86-NEXT:    movl %ebx, %eax
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %bl
 ; X86-NEXT:    xorb %al, %bl
+; X86-NEXT:    subb %al, %bl
 ; X86-NEXT:    movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; X86-NEXT:    movb %bh, %al
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %bh
 ; X86-NEXT:    xorb %al, %bh
+; X86-NEXT:    subb %al, %bh
 ; X86-NEXT:    movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %cl
 ; X86-NEXT:    xorb %al, %cl
+; X86-NEXT:    subb %al, %cl
 ; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %cl
 ; X86-NEXT:    xorb %al, %cl
+; X86-NEXT:    subb %al, %cl
 ; X86-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %bh
 ; X86-NEXT:    movb %bh, %al
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %bh
 ; X86-NEXT:    xorb %al, %bh
+; X86-NEXT:    subb %al, %bh
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %bl
 ; X86-NEXT:    movl %ebx, %eax
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %bl
 ; X86-NEXT:    xorb %al, %bl
+; X86-NEXT:    subb %al, %bl
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %dh
 ; X86-NEXT:    movb %dh, %al
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %dh
 ; X86-NEXT:    xorb %al, %dh
+; X86-NEXT:    subb %al, %dh
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %ch
 ; X86-NEXT:    movb %ch, %al
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %ch
 ; X86-NEXT:    xorb %al, %ch
+; X86-NEXT:    subb %al, %ch
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %dl
 ; X86-NEXT:    movl %edx, %eax
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %dl
 ; X86-NEXT:    xorb %al, %dl
+; X86-NEXT:    subb %al, %dl
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    sarb $7, %al
-; X86-NEXT:    addb %al, %cl
 ; X86-NEXT:    xorb %al, %cl
+; X86-NEXT:    subb %al, %cl
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movb %al, %ah
 ; X86-NEXT:    sarb $7, %ah
-; X86-NEXT:    addb %ah, %al
 ; X86-NEXT:    xorb %ah, %al
+; X86-NEXT:    subb %ah, %al
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movb %al, 15(%esi)
 ; X86-NEXT:    movb %cl, 14(%esi)

diff  --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll
index fd5930217cb06..de20b4dccb20a 100644
--- a/llvm/test/CodeGen/X86/combine-abs.ll
+++ b/llvm/test/CodeGen/X86/combine-abs.ll
@@ -110,13 +110,13 @@ define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm2, %xmm0
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    psubq %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm2, %xmm1
 ; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    psubq %xmm2, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; SSE42-LABEL: combine_v4i64_abs_abs:

diff  --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll
index a00ec41516c83..1cbb8360440d9 100644
--- a/llvm/test/CodeGen/X86/iabs.ll
+++ b/llvm/test/CodeGen/X86/iabs.ll
@@ -15,17 +15,17 @@ define i8 @test_i8(i8 %a) nounwind {
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    sarb $7, %cl
-; X86-NEXT:    addb %cl, %al
 ; X86-NEXT:    xorb %cl, %al
+; X86-NEXT:    subb %cl, %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    sarb $7, %cl
-; X64-NEXT:    leal (%rdi,%rcx), %eax
 ; X64-NEXT:    xorb %cl, %al
+; X64-NEXT:    subb %cl, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %tmp1neg = sub i8 0, %a
@@ -40,8 +40,8 @@ define i16 @test_i16(i16 %a) nounwind {
 ; X86-NO-CMOV-NEXT:    movswl {{[0-9]+}}(%esp), %eax
 ; X86-NO-CMOV-NEXT:    movl %eax, %ecx
 ; X86-NO-CMOV-NEXT:    sarl $15, %ecx
-; X86-NO-CMOV-NEXT:    addl %ecx, %eax
 ; X86-NO-CMOV-NEXT:    xorl %ecx, %eax
+; X86-NO-CMOV-NEXT:    subl %ecx, %eax
 ; X86-NO-CMOV-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NO-CMOV-NEXT:    retl
 ;
@@ -71,8 +71,8 @@ define i32 @test_i32(i32 %a) nounwind {
 ; X86-NO-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NO-CMOV-NEXT:    movl %eax, %ecx
 ; X86-NO-CMOV-NEXT:    sarl $31, %ecx
-; X86-NO-CMOV-NEXT:    addl %ecx, %eax
 ; X86-NO-CMOV-NEXT:    xorl %ecx, %eax
+; X86-NO-CMOV-NEXT:    subl %ecx, %eax
 ; X86-NO-CMOV-NEXT:    retl
 ;
 ; X86-CMOV-LABEL: test_i32:

diff  --git a/llvm/test/CodeGen/X86/neg-abs.ll b/llvm/test/CodeGen/X86/neg-abs.ll
index f34f683db078e..ee2564660a066 100644
--- a/llvm/test/CodeGen/X86/neg-abs.ll
+++ b/llvm/test/CodeGen/X86/neg-abs.ll
@@ -154,24 +154,21 @@ define i128 @neg_abs_i128(i128 %x) nounwind {
 define i8 @sub_abs_i8(i8 %x, i8 %y) nounwind {
 ; X86-LABEL: sub_abs_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    sarb $7, %dl
-; X86-NEXT:    addb %dl, %cl
-; X86-NEXT:    xorb %dl, %cl
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    sarb $7, %al
+; X86-NEXT:    xorb %al, %cl
 ; X86-NEXT:    subb %cl, %al
+; X86-NEXT:    addb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: sub_abs_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    movl %edi, %ecx
-; X64-NEXT:    sarb $7, %cl
-; X64-NEXT:    addb %cl, %dil
-; X64-NEXT:    xorb %cl, %dil
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    sarb $7, %al
+; X64-NEXT:    xorb %al, %dil
 ; X64-NEXT:    subb %dil, %al
-; X64-NEXT:    # kill: def $al killed $al killed $eax
+; X64-NEXT:    addb %sil, %al
 ; X64-NEXT:    retq
   %abs = tail call i8 @llvm.abs.i8(i8 %x, i1 false)
   %neg = sub nsw i8 %y, %abs
@@ -181,13 +178,12 @@ define i8 @sub_abs_i8(i8 %x, i8 %y) nounwind {
 define i16 @sub_abs_i16(i16 %x, i16 %y) nounwind {
 ; X86-LABEL: sub_abs_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    sarl $15, %edx
-; X86-NEXT:    addl %edx, %ecx
-; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    sarl $15, %eax
+; X86-NEXT:    xorl %eax, %ecx
 ; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
@@ -207,13 +203,12 @@ define i16 @sub_abs_i16(i16 %x, i16 %y) nounwind {
 define i32 @sub_abs_i32(i32 %x, i32 %y) nounwind {
 ; X86-LABEL: sub_abs_i32:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    sarl $31, %edx
-; X86-NEXT:    addl %edx, %ecx
-; X86-NEXT:    xorl %edx, %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    sarl $31, %eax
+; X86-NEXT:    xorl %eax, %ecx
 ; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: sub_abs_i32:

diff  --git a/llvm/test/CodeGen/X86/viabs.ll b/llvm/test/CodeGen/X86/viabs.ll
index 405d9eaa2c834..d892297d81ea6 100644
--- a/llvm/test/CodeGen/X86/viabs.ll
+++ b/llvm/test/CodeGen/X86/viabs.ll
@@ -12,8 +12,8 @@ define <4 x i32> @test_abs_gt_v4i32(<4 x i32> %a) nounwind {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrad $31, %xmm1
-; SSE2-NEXT:    paddd %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    psubd %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_gt_v4i32:
@@ -51,8 +51,8 @@ define <4 x i32> @test_abs_ge_v4i32(<4 x i32> %a) nounwind {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrad $31, %xmm1
-; SSE2-NEXT:    paddd %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    psubd %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_ge_v4i32:
@@ -176,8 +176,8 @@ define <4 x i32> @test_abs_le_v4i32(<4 x i32> %a) nounwind {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrad $31, %xmm1
-; SSE2-NEXT:    paddd %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    psubd %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_le_v4i32:
@@ -215,12 +215,12 @@ define <8 x i32> @test_abs_gt_v8i32(<8 x i32> %a) nounwind {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
-; SSE2-NEXT:    paddd %xmm2, %xmm0
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    psubd %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
-; SSE2-NEXT:    paddd %xmm2, %xmm1
 ; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    psubd %xmm2, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_gt_v8i32:
@@ -263,12 +263,12 @@ define <8 x i32> @test_abs_ge_v8i32(<8 x i32> %a) nounwind {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
-; SSE2-NEXT:    paddd %xmm2, %xmm0
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    psubd %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
-; SSE2-NEXT:    paddd %xmm2, %xmm1
 ; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    psubd %xmm2, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_ge_v8i32:
@@ -413,12 +413,12 @@ define <8 x i32> @test_abs_le_v8i32(<8 x i32> %a) nounwind {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
-; SSE2-NEXT:    paddd %xmm2, %xmm0
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    psubd %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
-; SSE2-NEXT:    paddd %xmm2, %xmm1
 ; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    psubd %xmm2, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_le_v8i32:
@@ -461,20 +461,20 @@ define <16 x i32> @test_abs_le_16i32(<16 x i32> %a) nounwind {
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    movdqa %xmm0, %xmm4
 ; SSE2-NEXT:    psrad $31, %xmm4
-; SSE2-NEXT:    paddd %xmm4, %xmm0
 ; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    psubd %xmm4, %xmm0
 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
 ; SSE2-NEXT:    psrad $31, %xmm4
-; SSE2-NEXT:    paddd %xmm4, %xmm1
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
+; SSE2-NEXT:    psubd %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    psrad $31, %xmm4
-; SSE2-NEXT:    paddd %xmm4, %xmm2
 ; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    psubd %xmm4, %xmm2
 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
 ; SSE2-NEXT:    psrad $31, %xmm4
-; SSE2-NEXT:    paddd %xmm4, %xmm3
 ; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    psubd %xmm4, %xmm3
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_le_16i32:
@@ -527,8 +527,8 @@ define <2 x i64> @test_abs_ge_v2i64(<2 x i64> %a) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm1
 ; SSE2-NEXT:    psrad $31, %xmm1
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm1, %xmm0
 ; SSE2-NEXT:    pxor %xmm1, %xmm0
+; SSE2-NEXT:    psubq %xmm1, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_ge_v2i64:
@@ -536,8 +536,8 @@ define <2 x i64> @test_abs_ge_v2i64(<2 x i64> %a) nounwind {
 ; SSSE3-NEXT:    movdqa %xmm0, %xmm1
 ; SSSE3-NEXT:    psrad $31, %xmm1
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSSE3-NEXT:    paddq %xmm1, %xmm0
 ; SSSE3-NEXT:    pxor %xmm1, %xmm0
+; SSSE3-NEXT:    psubq %xmm1, %xmm0
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: test_abs_ge_v2i64:
@@ -577,13 +577,13 @@ define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm2, %xmm0
 ; SSE2-NEXT:    pxor %xmm2, %xmm0
+; SSE2-NEXT:    psubq %xmm2, %xmm0
 ; SSE2-NEXT:    movdqa %xmm1, %xmm2
 ; SSE2-NEXT:    psrad $31, %xmm2
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm2, %xmm1
 ; SSE2-NEXT:    pxor %xmm2, %xmm1
+; SSE2-NEXT:    psubq %xmm2, %xmm1
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_gt_v4i64:
@@ -591,13 +591,13 @@ define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind {
 ; SSSE3-NEXT:    movdqa %xmm0, %xmm2
 ; SSSE3-NEXT:    psrad $31, %xmm2
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSSE3-NEXT:    paddq %xmm2, %xmm0
 ; SSSE3-NEXT:    pxor %xmm2, %xmm0
+; SSSE3-NEXT:    psubq %xmm2, %xmm0
 ; SSSE3-NEXT:    movdqa %xmm1, %xmm2
 ; SSSE3-NEXT:    psrad $31, %xmm2
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSSE3-NEXT:    paddq %xmm2, %xmm1
 ; SSSE3-NEXT:    pxor %xmm2, %xmm1
+; SSSE3-NEXT:    psubq %xmm2, %xmm1
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: test_abs_gt_v4i64:
@@ -646,23 +646,23 @@ define <8 x i64> @test_abs_le_v8i64(<8 x i64> %a) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm4
 ; SSE2-NEXT:    psrad $31, %xmm4
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm4, %xmm0
 ; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    psubq %xmm4, %xmm0
 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
 ; SSE2-NEXT:    psrad $31, %xmm4
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm4, %xmm1
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
+; SSE2-NEXT:    psubq %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    psrad $31, %xmm4
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm4, %xmm2
 ; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    psubq %xmm4, %xmm2
 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
 ; SSE2-NEXT:    psrad $31, %xmm4
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm4, %xmm3
 ; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    psubq %xmm4, %xmm3
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_le_v8i64:
@@ -670,23 +670,23 @@ define <8 x i64> @test_abs_le_v8i64(<8 x i64> %a) nounwind {
 ; SSSE3-NEXT:    movdqa %xmm0, %xmm4
 ; SSSE3-NEXT:    psrad $31, %xmm4
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT:    paddq %xmm4, %xmm0
 ; SSSE3-NEXT:    pxor %xmm4, %xmm0
+; SSSE3-NEXT:    psubq %xmm4, %xmm0
 ; SSSE3-NEXT:    movdqa %xmm1, %xmm4
 ; SSSE3-NEXT:    psrad $31, %xmm4
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT:    paddq %xmm4, %xmm1
 ; SSSE3-NEXT:    pxor %xmm4, %xmm1
+; SSSE3-NEXT:    psubq %xmm4, %xmm1
 ; SSSE3-NEXT:    movdqa %xmm2, %xmm4
 ; SSSE3-NEXT:    psrad $31, %xmm4
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT:    paddq %xmm4, %xmm2
 ; SSSE3-NEXT:    pxor %xmm4, %xmm2
+; SSSE3-NEXT:    psubq %xmm4, %xmm2
 ; SSSE3-NEXT:    movdqa %xmm3, %xmm4
 ; SSSE3-NEXT:    psrad $31, %xmm4
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT:    paddq %xmm4, %xmm3
 ; SSSE3-NEXT:    pxor %xmm4, %xmm3
+; SSSE3-NEXT:    psubq %xmm4, %xmm3
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: test_abs_le_v8i64:
@@ -754,23 +754,23 @@ define <8 x i64> @test_abs_le_v8i64_fold(<8 x i64>* %a.ptr) nounwind {
 ; SSE2-NEXT:    movdqa %xmm0, %xmm4
 ; SSE2-NEXT:    psrad $31, %xmm4
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm4, %xmm0
 ; SSE2-NEXT:    pxor %xmm4, %xmm0
+; SSE2-NEXT:    psubq %xmm4, %xmm0
 ; SSE2-NEXT:    movdqa %xmm1, %xmm4
 ; SSE2-NEXT:    psrad $31, %xmm4
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm4, %xmm1
 ; SSE2-NEXT:    pxor %xmm4, %xmm1
+; SSE2-NEXT:    psubq %xmm4, %xmm1
 ; SSE2-NEXT:    movdqa %xmm2, %xmm4
 ; SSE2-NEXT:    psrad $31, %xmm4
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm4, %xmm2
 ; SSE2-NEXT:    pxor %xmm4, %xmm2
+; SSE2-NEXT:    psubq %xmm4, %xmm2
 ; SSE2-NEXT:    movdqa %xmm3, %xmm4
 ; SSE2-NEXT:    psrad $31, %xmm4
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT:    paddq %xmm4, %xmm3
 ; SSE2-NEXT:    pxor %xmm4, %xmm3
+; SSE2-NEXT:    psubq %xmm4, %xmm3
 ; SSE2-NEXT:    retq
 ;
 ; SSSE3-LABEL: test_abs_le_v8i64_fold:
@@ -782,23 +782,23 @@ define <8 x i64> @test_abs_le_v8i64_fold(<8 x i64>* %a.ptr) nounwind {
 ; SSSE3-NEXT:    movdqa %xmm0, %xmm4
 ; SSSE3-NEXT:    psrad $31, %xmm4
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT:    paddq %xmm4, %xmm0
 ; SSSE3-NEXT:    pxor %xmm4, %xmm0
+; SSSE3-NEXT:    psubq %xmm4, %xmm0
 ; SSSE3-NEXT:    movdqa %xmm1, %xmm4
 ; SSSE3-NEXT:    psrad $31, %xmm4
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT:    paddq %xmm4, %xmm1
 ; SSSE3-NEXT:    pxor %xmm4, %xmm1
+; SSSE3-NEXT:    psubq %xmm4, %xmm1
 ; SSSE3-NEXT:    movdqa %xmm2, %xmm4
 ; SSSE3-NEXT:    psrad $31, %xmm4
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT:    paddq %xmm4, %xmm2
 ; SSSE3-NEXT:    pxor %xmm4, %xmm2
+; SSSE3-NEXT:    psubq %xmm4, %xmm2
 ; SSSE3-NEXT:    movdqa %xmm3, %xmm4
 ; SSSE3-NEXT:    psrad $31, %xmm4
 ; SSSE3-NEXT:    pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT:    paddq %xmm4, %xmm3
 ; SSSE3-NEXT:    pxor %xmm4, %xmm3
+; SSSE3-NEXT:    psubq %xmm4, %xmm3
 ; SSSE3-NEXT:    retq
 ;
 ; SSE41-LABEL: test_abs_le_v8i64_fold:

diff  --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected
index 78abaf5168068..9ae01c167b8da 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected
@@ -9,17 +9,17 @@ define i8 @test_i8(i8 %a) nounwind {
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    sarb $7, %cl
-; X86-NEXT:    addb %cl, %al
 ; X86-NEXT:    xorb %cl, %al
+; X86-NEXT:    subb %cl, %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $edi killed $edi def $rdi
-; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %eax, %ecx
 ; X64-NEXT:    sarb $7, %cl
-; X64-NEXT:    leal (%rdi,%rcx), %eax
 ; X64-NEXT:    xorb %cl, %al
+; X64-NEXT:    subb %cl, %al
 ; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %tmp1neg = sub i8 0, %a
@@ -34,8 +34,8 @@ define i16 @test_i16(i16 %a) nounwind {
 ; X86-NO-CMOV-NEXT:    movswl {{[0-9]+}}(%esp), %eax
 ; X86-NO-CMOV-NEXT:    movl %eax, %ecx
 ; X86-NO-CMOV-NEXT:    sarl $15, %ecx
-; X86-NO-CMOV-NEXT:    addl %ecx, %eax
 ; X86-NO-CMOV-NEXT:    xorl %ecx, %eax
+; X86-NO-CMOV-NEXT:    subl %ecx, %eax
 ; X86-NO-CMOV-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NO-CMOV-NEXT:    retl
 ;
@@ -65,8 +65,8 @@ define i32 @test_i32(i32 %a) nounwind {
 ; X86-NO-CMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NO-CMOV-NEXT:    movl %eax, %ecx
 ; X86-NO-CMOV-NEXT:    sarl $31, %ecx
-; X86-NO-CMOV-NEXT:    addl %ecx, %eax
 ; X86-NO-CMOV-NEXT:    xorl %ecx, %eax
+; X86-NO-CMOV-NEXT:    subl %ecx, %eax
 ; X86-NO-CMOV-NEXT:    retl
 ;
 ; X86-CMOV-LABEL: test_i32:


        


More information about the llvm-commits mailing list