[llvm] 440c4b7 - [SelectionDAG][RISCV][ARM][PowerPC][X86][WebAssembly] Change default abs expansion to use sra (X, size(X)-1); sub (xor (X, Y), Y).
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 20 21:44:18 PST 2022
Author: Craig Topper
Date: 2022-02-20T21:11:23-08:00
New Revision: 440c4b705ad1d494a183b53cd65f21a481726157
URL: https://github.com/llvm/llvm-project/commit/440c4b705ad1d494a183b53cd65f21a481726157
DIFF: https://github.com/llvm/llvm-project/commit/440c4b705ad1d494a183b53cd65f21a481726157.diff
LOG: [SelectionDAG][RISCV][ARM][PowerPC][X86][WebAssembly] Change default abs expansion to use sra (X, size(X)-1); sub (xor (X, Y), Y).
Previous we used sra (X, size(X)-1); xor (add (X, Y), Y).
By placing sub at the end, we allow RISCV to combine sign_extend_inreg
with it to form subw.
Some X86 tests for Z - abs(X) seem to have improved as well.
Other targets look to be a wash.
I had to modify ARM's abs matching code to match from sub instead of
xor. Maybe instead ISD::ABS should be made legal. I'll try that in
parallel to this patch.
This is an alternative to D119099 which was focused on RISCV only.
Reviewed By: RKSimon
Differential Revision: https://reviews.llvm.org/D119171
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
llvm/test/CodeGen/RISCV/rv32zbb.ll
llvm/test/CodeGen/RISCV/rv64zbb.ll
llvm/test/CodeGen/Thumb/iabs.ll
llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll
llvm/test/CodeGen/Thumb2/abs.ll
llvm/test/CodeGen/WebAssembly/PR41149.ll
llvm/test/CodeGen/X86/abs.ll
llvm/test/CodeGen/X86/combine-abs.ll
llvm/test/CodeGen/X86/iabs.ll
llvm/test/CodeGen/X86/neg-abs.ll
llvm/test/CodeGen/X86/viabs.ll
llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0b69496d14f9f..6619f1c42a888 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7443,13 +7443,13 @@ SDValue TargetLowering::expandABS(SDNode *N, SelectionDAG &DAG,
SDValue Shift =
DAG.getNode(ISD::SRA, dl, VT, Op,
DAG.getConstant(VT.getScalarSizeInBits() - 1, dl, ShVT));
- if (!IsNegative) {
- SDValue Add = DAG.getNode(ISD::ADD, dl, VT, Op, Shift);
- return DAG.getNode(ISD::XOR, dl, VT, Add, Shift);
- }
+ SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
+
+ // abs(x) -> Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
+ if (!IsNegative)
+ return DAG.getNode(ISD::SUB, dl, VT, Xor, Shift);
// 0 - abs(x) -> Y = sra (X, size(X)-1); sub (Y, xor (X, Y))
- SDValue Xor = DAG.getNode(ISD::XOR, dl, VT, Op, Shift);
return DAG.getNode(ISD::SUB, dl, VT, Shift, Xor);
}
diff --git a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 98c8133282a26..1735c0ddd11a5 100644
--- a/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -3464,40 +3464,39 @@ bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
return false;
}
-/// Target-specific DAG combining for ISD::XOR.
+/// Target-specific DAG combining for ISD::SUB.
/// Target-independent combining lowers SELECT_CC nodes of the form
/// select_cc setg[ge] X, 0, X, -X
/// select_cc setgt X, -1, X, -X
/// select_cc setl[te] X, 0, -X, X
/// select_cc setlt X, 1, -X, X
/// which represent Integer ABS into:
-/// Y = sra (X, size(X)-1); xor (add (X, Y), Y)
+/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
/// ARM instruction selection detects the latter and matches it to
/// ARM::ABS or ARM::t2ABS machine node.
bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
- SDValue XORSrc0 = N->getOperand(0);
- SDValue XORSrc1 = N->getOperand(1);
+ SDValue SUBSrc0 = N->getOperand(0);
+ SDValue SUBSrc1 = N->getOperand(1);
EVT VT = N->getValueType(0);
if (Subtarget->isThumb1Only())
return false;
- if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA)
+ if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
return false;
- SDValue ADDSrc0 = XORSrc0.getOperand(0);
- SDValue ADDSrc1 = XORSrc0.getOperand(1);
- SDValue SRASrc0 = XORSrc1.getOperand(0);
- SDValue SRASrc1 = XORSrc1.getOperand(1);
+ SDValue XORSrc0 = SUBSrc0.getOperand(0);
+ SDValue XORSrc1 = SUBSrc0.getOperand(1);
+ SDValue SRASrc0 = SUBSrc1.getOperand(0);
+ SDValue SRASrc1 = SUBSrc1.getOperand(1);
ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);
EVT XType = SRASrc0.getValueType();
unsigned Size = XType.getSizeInBits() - 1;
- if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 &&
- XType.isInteger() && SRAConstant != nullptr &&
- Size == SRAConstant->getZExtValue()) {
+ if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
+ SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
- CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0);
+ CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
return true;
}
@@ -3673,8 +3672,8 @@ void ARMDAGToDAGISel::Select(SDNode *N) {
if (tryInlineAsm(N))
return;
break;
- case ISD::XOR:
- // Select special operations if XOR node forms integer ABS pattern
+ case ISD::SUB:
+ // Select special operations if SUB node forms integer ABS pattern
if (tryABSOp(N))
return;
// Other cases are autogenerated.
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
index 1dc8a7b99bc37..ec5e433b57cf7 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-vabsd.ll
@@ -99,10 +99,10 @@ define <2 x i64> @sub_absv_64(<2 x i64> %a, <2 x i64> %b) local_unnamed_addr {
; CHECK-PWR7-NEXT: sub r4, r5, r6
; CHECK-PWR7-NEXT: sradi r5, r3, 63
; CHECK-PWR7-NEXT: sradi r6, r4, 63
-; CHECK-PWR7-NEXT: add r3, r3, r5
-; CHECK-PWR7-NEXT: add r4, r4, r6
; CHECK-PWR7-NEXT: xor r3, r3, r5
; CHECK-PWR7-NEXT: xor r4, r4, r6
+; CHECK-PWR7-NEXT: sub r3, r3, r5
+; CHECK-PWR7-NEXT: sub r4, r4, r6
; CHECK-PWR7-NEXT: std r3, -8(r1)
; CHECK-PWR7-NEXT: addi r3, r1, -16
; CHECK-PWR7-NEXT: std r4, -16(r1)
@@ -307,13 +307,13 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: sub r4, r7, r4
; CHECK-PWR9-LE-NEXT: srawi r6, r3, 31
; CHECK-PWR9-LE-NEXT: srawi r7, r4, 31
-; CHECK-PWR9-LE-NEXT: add r3, r3, r6
-; CHECK-PWR9-LE-NEXT: add r4, r4, r7
-; CHECK-PWR9-LE-NEXT: xor r6, r3, r6
-; CHECK-PWR9-LE-NEXT: srawi r3, r5, 31
+; CHECK-PWR9-LE-NEXT: xor r3, r3, r6
; CHECK-PWR9-LE-NEXT: xor r4, r4, r7
-; CHECK-PWR9-LE-NEXT: add r5, r5, r3
-; CHECK-PWR9-LE-NEXT: xor r3, r5, r3
+; CHECK-PWR9-LE-NEXT: sub r6, r3, r6
+; CHECK-PWR9-LE-NEXT: srawi r3, r5, 31
+; CHECK-PWR9-LE-NEXT: sub r4, r4, r7
+; CHECK-PWR9-LE-NEXT: xor r5, r5, r3
+; CHECK-PWR9-LE-NEXT: sub r3, r5, r3
; CHECK-PWR9-LE-NEXT: li r5, 3
; CHECK-PWR9-LE-NEXT: vextubrx r7, r5, v2
; CHECK-PWR9-LE-NEXT: vextubrx r5, r5, v3
@@ -321,8 +321,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: clrlwi r5, r5, 24
; CHECK-PWR9-LE-NEXT: sub r5, r7, r5
; CHECK-PWR9-LE-NEXT: srawi r7, r5, 31
-; CHECK-PWR9-LE-NEXT: add r5, r5, r7
; CHECK-PWR9-LE-NEXT: xor r5, r5, r7
+; CHECK-PWR9-LE-NEXT: sub r5, r5, r7
; CHECK-PWR9-LE-NEXT: li r7, 4
; CHECK-PWR9-LE-NEXT: vextubrx r8, r7, v2
; CHECK-PWR9-LE-NEXT: vextubrx r7, r7, v3
@@ -331,8 +331,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: clrlwi r7, r7, 24
; CHECK-PWR9-LE-NEXT: sub r7, r8, r7
; CHECK-PWR9-LE-NEXT: srawi r8, r7, 31
-; CHECK-PWR9-LE-NEXT: add r7, r7, r8
; CHECK-PWR9-LE-NEXT: xor r7, r7, r8
+; CHECK-PWR9-LE-NEXT: sub r7, r7, r8
; CHECK-PWR9-LE-NEXT: li r8, 5
; CHECK-PWR9-LE-NEXT: vextubrx r9, r8, v2
; CHECK-PWR9-LE-NEXT: vextubrx r8, r8, v3
@@ -340,8 +340,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: clrlwi r8, r8, 24
; CHECK-PWR9-LE-NEXT: sub r8, r9, r8
; CHECK-PWR9-LE-NEXT: srawi r9, r8, 31
-; CHECK-PWR9-LE-NEXT: add r8, r8, r9
; CHECK-PWR9-LE-NEXT: xor r8, r8, r9
+; CHECK-PWR9-LE-NEXT: sub r8, r8, r9
; CHECK-PWR9-LE-NEXT: li r9, 6
; CHECK-PWR9-LE-NEXT: vextubrx r10, r9, v2
; CHECK-PWR9-LE-NEXT: vextubrx r9, r9, v3
@@ -349,8 +349,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: clrlwi r9, r9, 24
; CHECK-PWR9-LE-NEXT: sub r9, r10, r9
; CHECK-PWR9-LE-NEXT: srawi r10, r9, 31
-; CHECK-PWR9-LE-NEXT: add r9, r9, r10
; CHECK-PWR9-LE-NEXT: xor r9, r9, r10
+; CHECK-PWR9-LE-NEXT: sub r9, r9, r10
; CHECK-PWR9-LE-NEXT: li r10, 7
; CHECK-PWR9-LE-NEXT: vextubrx r11, r10, v2
; CHECK-PWR9-LE-NEXT: vextubrx r10, r10, v3
@@ -358,8 +358,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: clrlwi r10, r10, 24
; CHECK-PWR9-LE-NEXT: sub r10, r11, r10
; CHECK-PWR9-LE-NEXT: srawi r11, r10, 31
-; CHECK-PWR9-LE-NEXT: add r10, r10, r11
; CHECK-PWR9-LE-NEXT: xor r10, r10, r11
+; CHECK-PWR9-LE-NEXT: sub r10, r10, r11
; CHECK-PWR9-LE-NEXT: li r11, 8
; CHECK-PWR9-LE-NEXT: vextubrx r12, r11, v2
; CHECK-PWR9-LE-NEXT: vextubrx r11, r11, v3
@@ -368,8 +368,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: clrlwi r11, r11, 24
; CHECK-PWR9-LE-NEXT: sub r11, r12, r11
; CHECK-PWR9-LE-NEXT: srawi r12, r11, 31
-; CHECK-PWR9-LE-NEXT: add r11, r11, r12
; CHECK-PWR9-LE-NEXT: xor r11, r11, r12
+; CHECK-PWR9-LE-NEXT: sub r11, r11, r12
; CHECK-PWR9-LE-NEXT: li r12, 9
; CHECK-PWR9-LE-NEXT: vextubrx r0, r12, v2
; CHECK-PWR9-LE-NEXT: vextubrx r12, r12, v3
@@ -377,8 +377,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: clrlwi r12, r12, 24
; CHECK-PWR9-LE-NEXT: sub r12, r0, r12
; CHECK-PWR9-LE-NEXT: srawi r0, r12, 31
-; CHECK-PWR9-LE-NEXT: add r12, r12, r0
; CHECK-PWR9-LE-NEXT: xor r12, r12, r0
+; CHECK-PWR9-LE-NEXT: sub r12, r12, r0
; CHECK-PWR9-LE-NEXT: li r0, 10
; CHECK-PWR9-LE-NEXT: vextubrx r30, r0, v2
; CHECK-PWR9-LE-NEXT: vextubrx r0, r0, v3
@@ -386,8 +386,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: clrlwi r0, r0, 24
; CHECK-PWR9-LE-NEXT: sub r0, r30, r0
; CHECK-PWR9-LE-NEXT: srawi r30, r0, 31
-; CHECK-PWR9-LE-NEXT: add r0, r0, r30
; CHECK-PWR9-LE-NEXT: xor r0, r0, r30
+; CHECK-PWR9-LE-NEXT: sub r0, r0, r30
; CHECK-PWR9-LE-NEXT: li r30, 11
; CHECK-PWR9-LE-NEXT: vextubrx r29, r30, v2
; CHECK-PWR9-LE-NEXT: vextubrx r30, r30, v3
@@ -395,8 +395,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: clrlwi r30, r30, 24
; CHECK-PWR9-LE-NEXT: sub r30, r29, r30
; CHECK-PWR9-LE-NEXT: srawi r29, r30, 31
-; CHECK-PWR9-LE-NEXT: add r30, r30, r29
; CHECK-PWR9-LE-NEXT: xor r30, r30, r29
+; CHECK-PWR9-LE-NEXT: sub r30, r30, r29
; CHECK-PWR9-LE-NEXT: li r29, 12
; CHECK-PWR9-LE-NEXT: vextubrx r28, r29, v2
; CHECK-PWR9-LE-NEXT: vextubrx r29, r29, v3
@@ -404,8 +404,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: clrlwi r29, r29, 24
; CHECK-PWR9-LE-NEXT: sub r29, r28, r29
; CHECK-PWR9-LE-NEXT: srawi r28, r29, 31
-; CHECK-PWR9-LE-NEXT: add r29, r29, r28
; CHECK-PWR9-LE-NEXT: xor r29, r29, r28
+; CHECK-PWR9-LE-NEXT: sub r29, r29, r28
; CHECK-PWR9-LE-NEXT: li r28, 13
; CHECK-PWR9-LE-NEXT: vextubrx r27, r28, v2
; CHECK-PWR9-LE-NEXT: vextubrx r28, r28, v3
@@ -413,8 +413,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: clrlwi r28, r28, 24
; CHECK-PWR9-LE-NEXT: sub r28, r27, r28
; CHECK-PWR9-LE-NEXT: srawi r27, r28, 31
-; CHECK-PWR9-LE-NEXT: add r28, r28, r27
; CHECK-PWR9-LE-NEXT: xor r28, r28, r27
+; CHECK-PWR9-LE-NEXT: sub r28, r28, r27
; CHECK-PWR9-LE-NEXT: li r27, 14
; CHECK-PWR9-LE-NEXT: vextubrx r26, r27, v2
; CHECK-PWR9-LE-NEXT: vextubrx r27, r27, v3
@@ -422,8 +422,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: clrlwi r27, r27, 24
; CHECK-PWR9-LE-NEXT: sub r27, r26, r27
; CHECK-PWR9-LE-NEXT: srawi r26, r27, 31
-; CHECK-PWR9-LE-NEXT: add r27, r27, r26
; CHECK-PWR9-LE-NEXT: xor r27, r27, r26
+; CHECK-PWR9-LE-NEXT: sub r27, r27, r26
; CHECK-PWR9-LE-NEXT: li r26, 15
; CHECK-PWR9-LE-NEXT: vextubrx r25, r26, v2
; CHECK-PWR9-LE-NEXT: vextubrx r26, r26, v3
@@ -441,10 +441,10 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-LE-NEXT: srawi r25, r26, 31
; CHECK-PWR9-LE-NEXT: vmrghb v3, v4, v3
; CHECK-PWR9-LE-NEXT: mtvsrd v4, r9
-; CHECK-PWR9-LE-NEXT: add r26, r26, r25
+; CHECK-PWR9-LE-NEXT: xor r26, r26, r25
; CHECK-PWR9-LE-NEXT: vmrghb v4, v5, v4
; CHECK-PWR9-LE-NEXT: mtvsrd v5, r30
-; CHECK-PWR9-LE-NEXT: xor r26, r26, r25
+; CHECK-PWR9-LE-NEXT: sub r26, r26, r25
; CHECK-PWR9-LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; CHECK-PWR9-LE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
; CHECK-PWR9-LE-NEXT: mtvsrd v0, r26
@@ -499,13 +499,13 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: srawi r6, r3, 31
; CHECK-PWR9-BE-NEXT: srawi r7, r4, 31
; CHECK-PWR9-BE-NEXT: srawi r8, r5, 31
-; CHECK-PWR9-BE-NEXT: add r3, r3, r6
-; CHECK-PWR9-BE-NEXT: add r4, r4, r7
-; CHECK-PWR9-BE-NEXT: add r5, r5, r8
; CHECK-PWR9-BE-NEXT: xor r3, r3, r6
-; CHECK-PWR9-BE-NEXT: li r6, 3
; CHECK-PWR9-BE-NEXT: xor r4, r4, r7
; CHECK-PWR9-BE-NEXT: xor r5, r5, r8
+; CHECK-PWR9-BE-NEXT: sub r3, r3, r6
+; CHECK-PWR9-BE-NEXT: li r6, 3
+; CHECK-PWR9-BE-NEXT: sub r4, r4, r7
+; CHECK-PWR9-BE-NEXT: sub r5, r5, r8
; CHECK-PWR9-BE-NEXT: vextublx r7, r6, v2
; CHECK-PWR9-BE-NEXT: vextublx r6, r6, v3
; CHECK-PWR9-BE-NEXT: mtvsrwz v1, r3
@@ -513,8 +513,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: clrlwi r6, r6, 24
; CHECK-PWR9-BE-NEXT: sub r6, r7, r6
; CHECK-PWR9-BE-NEXT: srawi r7, r6, 31
-; CHECK-PWR9-BE-NEXT: add r6, r6, r7
; CHECK-PWR9-BE-NEXT: xor r6, r6, r7
+; CHECK-PWR9-BE-NEXT: sub r6, r6, r7
; CHECK-PWR9-BE-NEXT: li r7, 4
; CHECK-PWR9-BE-NEXT: vextublx r8, r7, v2
; CHECK-PWR9-BE-NEXT: vextublx r7, r7, v3
@@ -522,8 +522,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: clrlwi r7, r7, 24
; CHECK-PWR9-BE-NEXT: sub r7, r8, r7
; CHECK-PWR9-BE-NEXT: srawi r8, r7, 31
-; CHECK-PWR9-BE-NEXT: add r7, r7, r8
; CHECK-PWR9-BE-NEXT: xor r7, r7, r8
+; CHECK-PWR9-BE-NEXT: sub r7, r7, r8
; CHECK-PWR9-BE-NEXT: li r8, 5
; CHECK-PWR9-BE-NEXT: vextublx r9, r8, v2
; CHECK-PWR9-BE-NEXT: vextublx r8, r8, v3
@@ -531,8 +531,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: clrlwi r8, r8, 24
; CHECK-PWR9-BE-NEXT: sub r8, r9, r8
; CHECK-PWR9-BE-NEXT: srawi r9, r8, 31
-; CHECK-PWR9-BE-NEXT: add r8, r8, r9
; CHECK-PWR9-BE-NEXT: xor r8, r8, r9
+; CHECK-PWR9-BE-NEXT: sub r8, r8, r9
; CHECK-PWR9-BE-NEXT: li r9, 6
; CHECK-PWR9-BE-NEXT: vextublx r10, r9, v2
; CHECK-PWR9-BE-NEXT: vextublx r9, r9, v3
@@ -540,8 +540,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: clrlwi r9, r9, 24
; CHECK-PWR9-BE-NEXT: sub r9, r10, r9
; CHECK-PWR9-BE-NEXT: srawi r10, r9, 31
-; CHECK-PWR9-BE-NEXT: add r9, r9, r10
; CHECK-PWR9-BE-NEXT: xor r9, r9, r10
+; CHECK-PWR9-BE-NEXT: sub r9, r9, r10
; CHECK-PWR9-BE-NEXT: li r10, 7
; CHECK-PWR9-BE-NEXT: vextublx r11, r10, v2
; CHECK-PWR9-BE-NEXT: vextublx r10, r10, v3
@@ -549,8 +549,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: clrlwi r10, r10, 24
; CHECK-PWR9-BE-NEXT: sub r10, r11, r10
; CHECK-PWR9-BE-NEXT: srawi r11, r10, 31
-; CHECK-PWR9-BE-NEXT: add r10, r10, r11
; CHECK-PWR9-BE-NEXT: xor r10, r10, r11
+; CHECK-PWR9-BE-NEXT: sub r10, r10, r11
; CHECK-PWR9-BE-NEXT: li r11, 8
; CHECK-PWR9-BE-NEXT: vextublx r12, r11, v2
; CHECK-PWR9-BE-NEXT: vextublx r11, r11, v3
@@ -558,8 +558,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: clrlwi r11, r11, 24
; CHECK-PWR9-BE-NEXT: sub r11, r12, r11
; CHECK-PWR9-BE-NEXT: srawi r12, r11, 31
-; CHECK-PWR9-BE-NEXT: add r11, r11, r12
; CHECK-PWR9-BE-NEXT: xor r11, r11, r12
+; CHECK-PWR9-BE-NEXT: sub r11, r11, r12
; CHECK-PWR9-BE-NEXT: li r12, 9
; CHECK-PWR9-BE-NEXT: vextublx r0, r12, v2
; CHECK-PWR9-BE-NEXT: vextublx r12, r12, v3
@@ -568,8 +568,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: clrlwi r12, r12, 24
; CHECK-PWR9-BE-NEXT: sub r12, r0, r12
; CHECK-PWR9-BE-NEXT: srawi r0, r12, 31
-; CHECK-PWR9-BE-NEXT: add r12, r12, r0
; CHECK-PWR9-BE-NEXT: xor r12, r12, r0
+; CHECK-PWR9-BE-NEXT: sub r12, r12, r0
; CHECK-PWR9-BE-NEXT: li r0, 10
; CHECK-PWR9-BE-NEXT: vextublx r30, r0, v2
; CHECK-PWR9-BE-NEXT: vextublx r0, r0, v3
@@ -577,8 +577,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: clrlwi r0, r0, 24
; CHECK-PWR9-BE-NEXT: sub r0, r30, r0
; CHECK-PWR9-BE-NEXT: srawi r30, r0, 31
-; CHECK-PWR9-BE-NEXT: add r0, r0, r30
; CHECK-PWR9-BE-NEXT: xor r0, r0, r30
+; CHECK-PWR9-BE-NEXT: sub r0, r0, r30
; CHECK-PWR9-BE-NEXT: li r30, 11
; CHECK-PWR9-BE-NEXT: vextublx r29, r30, v2
; CHECK-PWR9-BE-NEXT: vextublx r30, r30, v3
@@ -586,8 +586,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: clrlwi r30, r30, 24
; CHECK-PWR9-BE-NEXT: sub r30, r29, r30
; CHECK-PWR9-BE-NEXT: srawi r29, r30, 31
-; CHECK-PWR9-BE-NEXT: add r30, r30, r29
; CHECK-PWR9-BE-NEXT: xor r30, r30, r29
+; CHECK-PWR9-BE-NEXT: sub r30, r30, r29
; CHECK-PWR9-BE-NEXT: li r29, 12
; CHECK-PWR9-BE-NEXT: vextublx r28, r29, v2
; CHECK-PWR9-BE-NEXT: vextublx r29, r29, v3
@@ -595,8 +595,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: clrlwi r29, r29, 24
; CHECK-PWR9-BE-NEXT: sub r29, r28, r29
; CHECK-PWR9-BE-NEXT: srawi r28, r29, 31
-; CHECK-PWR9-BE-NEXT: add r29, r29, r28
; CHECK-PWR9-BE-NEXT: xor r29, r29, r28
+; CHECK-PWR9-BE-NEXT: sub r29, r29, r28
; CHECK-PWR9-BE-NEXT: li r28, 13
; CHECK-PWR9-BE-NEXT: vextublx r27, r28, v2
; CHECK-PWR9-BE-NEXT: vextublx r28, r28, v3
@@ -606,8 +606,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: clrlwi r28, r28, 24
; CHECK-PWR9-BE-NEXT: sub r28, r27, r28
; CHECK-PWR9-BE-NEXT: srawi r27, r28, 31
-; CHECK-PWR9-BE-NEXT: add r28, r28, r27
; CHECK-PWR9-BE-NEXT: xor r28, r28, r27
+; CHECK-PWR9-BE-NEXT: sub r28, r28, r27
; CHECK-PWR9-BE-NEXT: li r27, 14
; CHECK-PWR9-BE-NEXT: vextublx r26, r27, v2
; CHECK-PWR9-BE-NEXT: vextublx r27, r27, v3
@@ -615,8 +615,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: clrlwi r27, r27, 24
; CHECK-PWR9-BE-NEXT: sub r27, r26, r27
; CHECK-PWR9-BE-NEXT: srawi r26, r27, 31
-; CHECK-PWR9-BE-NEXT: add r27, r27, r26
; CHECK-PWR9-BE-NEXT: xor r27, r27, r26
+; CHECK-PWR9-BE-NEXT: sub r27, r27, r26
; CHECK-PWR9-BE-NEXT: li r26, 15
; CHECK-PWR9-BE-NEXT: vextublx r25, r26, v2
; CHECK-PWR9-BE-NEXT: vextublx r26, r26, v3
@@ -629,8 +629,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR9-BE-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
; CHECK-PWR9-BE-NEXT: sub r26, r25, r26
; CHECK-PWR9-BE-NEXT: srawi r25, r26, 31
-; CHECK-PWR9-BE-NEXT: add r26, r26, r25
; CHECK-PWR9-BE-NEXT: xor r26, r26, r25
+; CHECK-PWR9-BE-NEXT: sub r26, r26, r25
; CHECK-PWR9-BE-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
; CHECK-PWR9-BE-NEXT: mtvsrwz v2, r26
; CHECK-PWR9-BE-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
@@ -707,25 +707,25 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR8-NEXT: srawi r12, r7, 31
; CHECK-PWR8-NEXT: clrlwi r10, r0, 24
; CHECK-PWR8-NEXT: clrlwi r0, r30, 24
-; CHECK-PWR8-NEXT: add r4, r4, r3
-; CHECK-PWR8-NEXT: add r7, r7, r12
+; CHECK-PWR8-NEXT: xor r4, r4, r3
+; CHECK-PWR8-NEXT: xor r7, r7, r12
; CHECK-PWR8-NEXT: sub r10, r10, r0
; CHECK-PWR8-NEXT: std r20, -96(r1) # 8-byte Folded Spill
; CHECK-PWR8-NEXT: std r21, -88(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: xor r3, r4, r3
+; CHECK-PWR8-NEXT: sub r3, r4, r3
; CHECK-PWR8-NEXT: srawi r4, r9, 31
-; CHECK-PWR8-NEXT: xor r7, r7, r12
+; CHECK-PWR8-NEXT: sub r7, r7, r12
; CHECK-PWR8-NEXT: std r22, -80(r1) # 8-byte Folded Spill
; CHECK-PWR8-NEXT: rldicl r29, r5, 24, 56
; CHECK-PWR8-NEXT: rldicl r28, r6, 24, 56
-; CHECK-PWR8-NEXT: add r9, r9, r4
+; CHECK-PWR8-NEXT: xor r9, r9, r4
; CHECK-PWR8-NEXT: mtvsrd v3, r7
; CHECK-PWR8-NEXT: rldicl r27, r5, 16, 56
; CHECK-PWR8-NEXT: rldicl r25, r6, 16, 56
; CHECK-PWR8-NEXT: clrlwi r30, r29, 24
; CHECK-PWR8-NEXT: clrlwi r29, r28, 24
; CHECK-PWR8-NEXT: mtvsrd v2, r3
-; CHECK-PWR8-NEXT: xor r4, r9, r4
+; CHECK-PWR8-NEXT: sub r4, r9, r4
; CHECK-PWR8-NEXT: srawi r7, r10, 31
; CHECK-PWR8-NEXT: srawi r3, r11, 31
; CHECK-PWR8-NEXT: clrlwi r9, r27, 24
@@ -733,15 +733,15 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR8-NEXT: sub r0, r30, r29
; CHECK-PWR8-NEXT: mtvsrd v4, r4
; CHECK-PWR8-NEXT: std r23, -72(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: add r10, r10, r7
-; CHECK-PWR8-NEXT: add r11, r11, r3
+; CHECK-PWR8-NEXT: xor r10, r10, r7
+; CHECK-PWR8-NEXT: xor r11, r11, r3
; CHECK-PWR8-NEXT: sub r9, r9, r12
; CHECK-PWR8-NEXT: std r18, -112(r1) # 8-byte Folded Spill
; CHECK-PWR8-NEXT: std r19, -104(r1) # 8-byte Folded Spill
; CHECK-PWR8-NEXT: vmrghb v2, v3, v2
-; CHECK-PWR8-NEXT: xor r7, r10, r7
+; CHECK-PWR8-NEXT: sub r7, r10, r7
; CHECK-PWR8-NEXT: rldicl r5, r5, 8, 56
-; CHECK-PWR8-NEXT: xor r3, r11, r3
+; CHECK-PWR8-NEXT: sub r3, r11, r3
; CHECK-PWR8-NEXT: rldicl r6, r6, 8, 56
; CHECK-PWR8-NEXT: srawi r4, r0, 31
; CHECK-PWR8-NEXT: mtvsrd v0, r7
@@ -754,13 +754,13 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR8-NEXT: clrlwi r5, r6, 24
; CHECK-PWR8-NEXT: clrldi r22, r24, 56
; CHECK-PWR8-NEXT: rldicl r21, r26, 56, 56
-; CHECK-PWR8-NEXT: add r10, r0, r4
-; CHECK-PWR8-NEXT: add r9, r9, r7
+; CHECK-PWR8-NEXT: xor r10, r0, r4
+; CHECK-PWR8-NEXT: xor r9, r9, r7
; CHECK-PWR8-NEXT: rldicl r20, r24, 56, 56
; CHECK-PWR8-NEXT: rldicl r19, r26, 48, 56
; CHECK-PWR8-NEXT: sub r3, r3, r5
-; CHECK-PWR8-NEXT: xor r4, r10, r4
-; CHECK-PWR8-NEXT: xor r7, r9, r7
+; CHECK-PWR8-NEXT: sub r4, r10, r4
+; CHECK-PWR8-NEXT: sub r7, r9, r7
; CHECK-PWR8-NEXT: clrlwi r9, r23, 24
; CHECK-PWR8-NEXT: rldicl r18, r24, 48, 56
; CHECK-PWR8-NEXT: clrlwi r10, r22, 24
@@ -779,7 +779,7 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR8-NEXT: clrlwi r12, r18, 24
; CHECK-PWR8-NEXT: vmrghb v4, v5, v4
; CHECK-PWR8-NEXT: std r31, -8(r1) # 8-byte Folded Spill
-; CHECK-PWR8-NEXT: add r3, r3, r4
+; CHECK-PWR8-NEXT: xor r3, r3, r4
; CHECK-PWR8-NEXT: sub r7, r11, r12
; CHECK-PWR8-NEXT: clrlwi r11, r17, 24
; CHECK-PWR8-NEXT: clrlwi r12, r16, 24
@@ -787,7 +787,7 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR8-NEXT: std r2, -152(r1) # 8-byte Folded Spill
; CHECK-PWR8-NEXT: rldicl r15, r26, 32, 56
; CHECK-PWR8-NEXT: rldicl r14, r24, 32, 56
-; CHECK-PWR8-NEXT: xor r3, r3, r4
+; CHECK-PWR8-NEXT: sub r3, r3, r4
; CHECK-PWR8-NEXT: sub r11, r11, r12
; CHECK-PWR8-NEXT: srawi r4, r9, 31
; CHECK-PWR8-NEXT: srawi r12, r10, 31
@@ -795,40 +795,40 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR8-NEXT: clrlwi r30, r14, 24
; CHECK-PWR8-NEXT: mtvsrd v5, r3
; CHECK-PWR8-NEXT: ld r27, -40(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT: add r9, r9, r4
-; CHECK-PWR8-NEXT: add r10, r10, r12
+; CHECK-PWR8-NEXT: xor r9, r9, r4
+; CHECK-PWR8-NEXT: xor r10, r10, r12
; CHECK-PWR8-NEXT: sub r3, r0, r30
; CHECK-PWR8-NEXT: ld r25, -56(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: ld r23, -72(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: ld r22, -80(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: srawi r28, r11, 31
-; CHECK-PWR8-NEXT: xor r4, r9, r4
-; CHECK-PWR8-NEXT: xor r10, r10, r12
+; CHECK-PWR8-NEXT: sub r4, r9, r4
+; CHECK-PWR8-NEXT: sub r10, r10, r12
; CHECK-PWR8-NEXT: vmrghb v3, v5, v3
; CHECK-PWR8-NEXT: ld r21, -88(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: ld r20, -96(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: srawi r29, r7, 31
; CHECK-PWR8-NEXT: srawi r9, r3, 31
; CHECK-PWR8-NEXT: mtvsrd v5, r4
-; CHECK-PWR8-NEXT: add r4, r11, r28
+; CHECK-PWR8-NEXT: xor r4, r11, r28
; CHECK-PWR8-NEXT: ld r19, -104(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: ld r18, -112(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: mtvsrd v1, r10
; CHECK-PWR8-NEXT: ld r10, -160(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: rldicl r31, r26, 24, 56
; CHECK-PWR8-NEXT: rldicl r2, r24, 24, 56
-; CHECK-PWR8-NEXT: add r7, r7, r29
-; CHECK-PWR8-NEXT: add r3, r3, r9
+; CHECK-PWR8-NEXT: xor r7, r7, r29
+; CHECK-PWR8-NEXT: xor r3, r3, r9
; CHECK-PWR8-NEXT: rldicl r8, r24, 16, 56
; CHECK-PWR8-NEXT: rldicl r6, r26, 8, 56
-; CHECK-PWR8-NEXT: xor r4, r4, r28
+; CHECK-PWR8-NEXT: sub r4, r4, r28
; CHECK-PWR8-NEXT: clrlwi r0, r31, 24
; CHECK-PWR8-NEXT: clrlwi r30, r2, 24
-; CHECK-PWR8-NEXT: xor r7, r7, r29
+; CHECK-PWR8-NEXT: sub r7, r7, r29
; CHECK-PWR8-NEXT: rldicl r5, r24, 8, 56
; CHECK-PWR8-NEXT: clrlwi r10, r10, 24
; CHECK-PWR8-NEXT: clrlwi r8, r8, 24
-; CHECK-PWR8-NEXT: xor r3, r3, r9
+; CHECK-PWR8-NEXT: sub r3, r3, r9
; CHECK-PWR8-NEXT: mtvsrd v7, r4
; CHECK-PWR8-NEXT: clrlwi r4, r6, 24
; CHECK-PWR8-NEXT: clrlwi r5, r5, 24
@@ -845,18 +845,18 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR8-NEXT: ld r28, -32(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: srawi r6, r7, 31
; CHECK-PWR8-NEXT: srawi r5, r3, 31
-; CHECK-PWR8-NEXT: add r8, r0, r12
+; CHECK-PWR8-NEXT: xor r8, r0, r12
; CHECK-PWR8-NEXT: vmrghb v5, v1, v5
; CHECK-PWR8-NEXT: ld r26, -48(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: ld r24, -64(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT: add r4, r7, r6
-; CHECK-PWR8-NEXT: add r3, r3, r5
-; CHECK-PWR8-NEXT: xor r8, r8, r12
+; CHECK-PWR8-NEXT: xor r4, r7, r6
+; CHECK-PWR8-NEXT: xor r3, r3, r5
+; CHECK-PWR8-NEXT: sub r8, r8, r12
; CHECK-PWR8-NEXT: vmrghb v6, v7, v6
; CHECK-PWR8-NEXT: ld r17, -120(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: ld r16, -128(r1) # 8-byte Folded Reload
-; CHECK-PWR8-NEXT: xor r4, r4, r6
-; CHECK-PWR8-NEXT: xor r3, r3, r5
+; CHECK-PWR8-NEXT: sub r4, r4, r6
+; CHECK-PWR8-NEXT: sub r3, r3, r5
; CHECK-PWR8-NEXT: mtvsrd v9, r8
; CHECK-PWR8-NEXT: ld r15, -136(r1) # 8-byte Folded Reload
; CHECK-PWR8-NEXT: ld r14, -144(r1) # 8-byte Folded Reload
@@ -875,15 +875,8 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
;
; CHECK-PWR7-LABEL: sub_absv_8_ext:
; CHECK-PWR7: # %bb.0: # %entry
-; CHECK-PWR7-NEXT: stdu r1, -464(r1)
-; CHECK-PWR7-NEXT: .cfi_def_cfa_offset 464
-; CHECK-PWR7-NEXT: .cfi_offset r16, -128
-; CHECK-PWR7-NEXT: .cfi_offset r17, -120
-; CHECK-PWR7-NEXT: .cfi_offset r18, -112
-; CHECK-PWR7-NEXT: .cfi_offset r19, -104
-; CHECK-PWR7-NEXT: .cfi_offset r20, -96
-; CHECK-PWR7-NEXT: .cfi_offset r21, -88
-; CHECK-PWR7-NEXT: .cfi_offset r22, -80
+; CHECK-PWR7-NEXT: stdu r1, -416(r1)
+; CHECK-PWR7-NEXT: .cfi_def_cfa_offset 416
; CHECK-PWR7-NEXT: .cfi_offset r23, -72
; CHECK-PWR7-NEXT: .cfi_offset r24, -64
; CHECK-PWR7-NEXT: .cfi_offset r25, -56
@@ -893,167 +886,156 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR7-NEXT: .cfi_offset r29, -24
; CHECK-PWR7-NEXT: .cfi_offset r30, -16
; CHECK-PWR7-NEXT: addi r3, r1, 304
-; CHECK-PWR7-NEXT: std r16, 336(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT: std r23, 344(r1) # 8-byte Folded Spill
; CHECK-PWR7-NEXT: addi r4, r1, 320
-; CHECK-PWR7-NEXT: std r17, 344(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r18, 352(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r19, 360(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r20, 368(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r21, 376(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r22, 384(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r23, 392(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r24, 400(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r25, 408(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r26, 416(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r27, 424(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r28, 432(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r29, 440(r1) # 8-byte Folded Spill
-; CHECK-PWR7-NEXT: std r30, 448(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT: std r24, 352(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT: std r25, 360(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT: std r26, 368(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT: std r27, 376(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT: std r28, 384(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT: std r29, 392(r1) # 8-byte Folded Spill
+; CHECK-PWR7-NEXT: std r30, 400(r1) # 8-byte Folded Spill
; CHECK-PWR7-NEXT: stxvw4x v2, 0, r3
; CHECK-PWR7-NEXT: lbz r3, 304(r1)
; CHECK-PWR7-NEXT: stxvw4x v3, 0, r4
-; CHECK-PWR7-NEXT: lbz r4, 320(r1)
-; CHECK-PWR7-NEXT: lbz r5, 305(r1)
-; CHECK-PWR7-NEXT: lbz r6, 321(r1)
-; CHECK-PWR7-NEXT: lbz r7, 306(r1)
-; CHECK-PWR7-NEXT: lbz r8, 322(r1)
; CHECK-PWR7-NEXT: lbz r9, 307(r1)
-; CHECK-PWR7-NEXT: sub r3, r3, r4
; CHECK-PWR7-NEXT: lbz r10, 323(r1)
; CHECK-PWR7-NEXT: lbz r11, 308(r1)
-; CHECK-PWR7-NEXT: sub r5, r5, r6
; CHECK-PWR7-NEXT: lbz r12, 324(r1)
; CHECK-PWR7-NEXT: lbz r0, 309(r1)
-; CHECK-PWR7-NEXT: sub r6, r7, r8
; CHECK-PWR7-NEXT: lbz r30, 325(r1)
-; CHECK-PWR7-NEXT: lbz r29, 310(r1)
; CHECK-PWR7-NEXT: sub r9, r9, r10
+; CHECK-PWR7-NEXT: lbz r29, 310(r1)
; CHECK-PWR7-NEXT: lbz r28, 326(r1)
-; CHECK-PWR7-NEXT: lbz r23, 313(r1)
-; CHECK-PWR7-NEXT: sub r10, r11, r12
-; CHECK-PWR7-NEXT: lbz r22, 329(r1)
-; CHECK-PWR7-NEXT: lbz r4, 314(r1)
-; CHECK-PWR7-NEXT: sub r0, r0, r30
-; CHECK-PWR7-NEXT: lbz r21, 330(r1)
-; CHECK-PWR7-NEXT: lbz r7, 315(r1)
-; CHECK-PWR7-NEXT: sub r30, r29, r28
-; CHECK-PWR7-NEXT: srawi r20, r0, 31
-; CHECK-PWR7-NEXT: lbz r8, 331(r1)
-; CHECK-PWR7-NEXT: lbz r11, 316(r1)
-; CHECK-PWR7-NEXT: sub r23, r23, r22
-; CHECK-PWR7-NEXT: srawi r19, r30, 31
-; CHECK-PWR7-NEXT: lbz r12, 332(r1)
-; CHECK-PWR7-NEXT: lbz r29, 317(r1)
-; CHECK-PWR7-NEXT: sub r4, r4, r21
-; CHECK-PWR7-NEXT: add r0, r0, r20
-; CHECK-PWR7-NEXT: lbz r28, 333(r1)
-; CHECK-PWR7-NEXT: lbz r22, 319(r1)
-; CHECK-PWR7-NEXT: sub r7, r7, r8
-; CHECK-PWR7-NEXT: add r30, r30, r19
-; CHECK-PWR7-NEXT: lbz r21, 335(r1)
+; CHECK-PWR7-NEXT: sub r11, r11, r12
; CHECK-PWR7-NEXT: lbz r27, 311(r1)
-; CHECK-PWR7-NEXT: sub r8, r11, r12
-; CHECK-PWR7-NEXT: xor r0, r0, r20
; CHECK-PWR7-NEXT: lbz r26, 327(r1)
+; CHECK-PWR7-NEXT: sub r0, r0, r30
; CHECK-PWR7-NEXT: lbz r25, 312(r1)
-; CHECK-PWR7-NEXT: sub r11, r29, r28
-; CHECK-PWR7-NEXT: srawi r28, r3, 31
; CHECK-PWR7-NEXT: lbz r24, 328(r1)
-; CHECK-PWR7-NEXT: sub r29, r22, r21
-; CHECK-PWR7-NEXT: add r3, r3, r28
-; CHECK-PWR7-NEXT: xor r30, r30, r19
+; CHECK-PWR7-NEXT: sub r29, r29, r28
+; CHECK-PWR7-NEXT: lbz r10, 315(r1)
+; CHECK-PWR7-NEXT: lbz r12, 331(r1)
; CHECK-PWR7-NEXT: sub r27, r27, r26
-; CHECK-PWR7-NEXT: srawi r17, r29, 31
+; CHECK-PWR7-NEXT: lbz r30, 316(r1)
+; CHECK-PWR7-NEXT: lbz r28, 332(r1)
+; CHECK-PWR7-NEXT: sub r25, r25, r24
+; CHECK-PWR7-NEXT: lbz r4, 320(r1)
+; CHECK-PWR7-NEXT: lbz r5, 305(r1)
+; CHECK-PWR7-NEXT: sub r10, r10, r12
+; CHECK-PWR7-NEXT: lbz r6, 321(r1)
+; CHECK-PWR7-NEXT: lbz r26, 317(r1)
+; CHECK-PWR7-NEXT: sub r30, r30, r28
+; CHECK-PWR7-NEXT: lbz r24, 333(r1)
+; CHECK-PWR7-NEXT: lbz r12, 319(r1)
+; CHECK-PWR7-NEXT: sub r3, r3, r4
+; CHECK-PWR7-NEXT: lbz r28, 335(r1)
+; CHECK-PWR7-NEXT: lbz r7, 306(r1)
+; CHECK-PWR7-NEXT: sub r5, r5, r6
+; CHECK-PWR7-NEXT: lbz r8, 322(r1)
+; CHECK-PWR7-NEXT: sub r26, r26, r24
+; CHECK-PWR7-NEXT: srawi r24, r5, 31
+; CHECK-PWR7-NEXT: lbz r23, 313(r1)
+; CHECK-PWR7-NEXT: sub r12, r12, r28
+; CHECK-PWR7-NEXT: srawi r28, r3, 31
+; CHECK-PWR7-NEXT: xor r5, r5, r24
+; CHECK-PWR7-NEXT: lbz r4, 329(r1)
+; CHECK-PWR7-NEXT: sub r7, r7, r8
; CHECK-PWR7-NEXT: xor r3, r3, r28
-; CHECK-PWR7-NEXT: ld r20, 368(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT: sub r26, r25, r24
-; CHECK-PWR7-NEXT: lbz r25, 318(r1)
-; CHECK-PWR7-NEXT: lbz r24, 334(r1)
-; CHECK-PWR7-NEXT: add r29, r29, r17
-; CHECK-PWR7-NEXT: xor r29, r29, r17
-; CHECK-PWR7-NEXT: srawi r18, r27, 31
-; CHECK-PWR7-NEXT: ld r19, 360(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT: sub r12, r25, r24
-; CHECK-PWR7-NEXT: stb r29, 288(r1)
-; CHECK-PWR7-NEXT: add r28, r27, r18
-; CHECK-PWR7-NEXT: srawi r29, r12, 31
-; CHECK-PWR7-NEXT: srawi r16, r26, 31
-; CHECK-PWR7-NEXT: xor r28, r28, r18
-; CHECK-PWR7-NEXT: ld r18, 352(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT: add r12, r12, r29
-; CHECK-PWR7-NEXT: add r27, r26, r16
-; CHECK-PWR7-NEXT: xor r12, r12, r29
-; CHECK-PWR7-NEXT: srawi r29, r7, 31
-; CHECK-PWR7-NEXT: xor r27, r27, r16
-; CHECK-PWR7-NEXT: ld r16, 336(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT: srawi r26, r8, 31
-; CHECK-PWR7-NEXT: srawi r25, r5, 31
-; CHECK-PWR7-NEXT: add r7, r7, r29
-; CHECK-PWR7-NEXT: add r8, r8, r26
-; CHECK-PWR7-NEXT: srawi r24, r6, 31
-; CHECK-PWR7-NEXT: add r5, r5, r25
-; CHECK-PWR7-NEXT: xor r7, r7, r29
-; CHECK-PWR7-NEXT: srawi r22, r9, 31
-; CHECK-PWR7-NEXT: srawi r21, r10, 31
-; CHECK-PWR7-NEXT: xor r8, r8, r26
-; CHECK-PWR7-NEXT: xor r5, r5, r25
-; CHECK-PWR7-NEXT: srawi r17, r11, 31
-; CHECK-PWR7-NEXT: srawi r26, r23, 31
-; CHECK-PWR7-NEXT: add r6, r6, r24
-; CHECK-PWR7-NEXT: add r9, r9, r22
-; CHECK-PWR7-NEXT: srawi r29, r4, 31
-; CHECK-PWR7-NEXT: add r10, r10, r21
-; CHECK-PWR7-NEXT: add r11, r11, r17
-; CHECK-PWR7-NEXT: add r25, r23, r26
-; CHECK-PWR7-NEXT: add r4, r4, r29
-; CHECK-PWR7-NEXT: xor r6, r6, r24
-; CHECK-PWR7-NEXT: xor r9, r9, r22
-; CHECK-PWR7-NEXT: xor r10, r10, r21
-; CHECK-PWR7-NEXT: xor r11, r11, r17
-; CHECK-PWR7-NEXT: xor r4, r4, r29
-; CHECK-PWR7-NEXT: xor r26, r25, r26
-; CHECK-PWR7-NEXT: addi r29, r1, 224
-; CHECK-PWR7-NEXT: stb r12, 272(r1)
+; CHECK-PWR7-NEXT: lbz r6, 314(r1)
+; CHECK-PWR7-NEXT: lbz r8, 330(r1)
+; CHECK-PWR7-NEXT: sub r3, r3, r28
+; CHECK-PWR7-NEXT: srawi r28, r7, 31
+; CHECK-PWR7-NEXT: sub r5, r5, r24
+; CHECK-PWR7-NEXT: srawi r24, r9, 31
+; CHECK-PWR7-NEXT: xor r7, r7, r28
+; CHECK-PWR7-NEXT: xor r9, r9, r24
+; CHECK-PWR7-NEXT: sub r7, r7, r28
+; CHECK-PWR7-NEXT: srawi r28, r11, 31
+; CHECK-PWR7-NEXT: sub r9, r9, r24
+; CHECK-PWR7-NEXT: srawi r24, r0, 31
+; CHECK-PWR7-NEXT: xor r11, r11, r28
+; CHECK-PWR7-NEXT: xor r0, r0, r24
+; CHECK-PWR7-NEXT: sub r11, r11, r28
+; CHECK-PWR7-NEXT: srawi r28, r29, 31
+; CHECK-PWR7-NEXT: sub r0, r0, r24
+; CHECK-PWR7-NEXT: srawi r24, r27, 31
+; CHECK-PWR7-NEXT: sub r4, r23, r4
+; CHECK-PWR7-NEXT: xor r29, r29, r28
+; CHECK-PWR7-NEXT: lbz r23, 318(r1)
+; CHECK-PWR7-NEXT: xor r27, r27, r24
+; CHECK-PWR7-NEXT: sub r29, r29, r28
+; CHECK-PWR7-NEXT: srawi r28, r25, 31
+; CHECK-PWR7-NEXT: sub r27, r27, r24
+; CHECK-PWR7-NEXT: srawi r24, r4, 31
+; CHECK-PWR7-NEXT: sub r6, r6, r8
+; CHECK-PWR7-NEXT: xor r25, r25, r28
+; CHECK-PWR7-NEXT: lbz r8, 334(r1)
+; CHECK-PWR7-NEXT: xor r4, r4, r24
+; CHECK-PWR7-NEXT: sub r28, r25, r28
+; CHECK-PWR7-NEXT: srawi r25, r6, 31
+; CHECK-PWR7-NEXT: sub r4, r4, r24
+; CHECK-PWR7-NEXT: srawi r24, r10, 31
+; CHECK-PWR7-NEXT: xor r6, r6, r25
+; CHECK-PWR7-NEXT: xor r10, r10, r24
+; CHECK-PWR7-NEXT: sub r6, r6, r25
+; CHECK-PWR7-NEXT: srawi r25, r30, 31
+; CHECK-PWR7-NEXT: sub r10, r10, r24
+; CHECK-PWR7-NEXT: srawi r24, r26, 31
+; CHECK-PWR7-NEXT: sub r8, r23, r8
+; CHECK-PWR7-NEXT: xor r30, r30, r25
+; CHECK-PWR7-NEXT: ld r23, 344(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT: xor r26, r26, r24
+; CHECK-PWR7-NEXT: sub r30, r30, r25
+; CHECK-PWR7-NEXT: srawi r25, r12, 31
+; CHECK-PWR7-NEXT: sub r26, r26, r24
+; CHECK-PWR7-NEXT: srawi r24, r8, 31
+; CHECK-PWR7-NEXT: xor r12, r12, r25
+; CHECK-PWR7-NEXT: xor r8, r8, r24
+; CHECK-PWR7-NEXT: sub r12, r12, r25
+; CHECK-PWR7-NEXT: addi r25, r1, 272
+; CHECK-PWR7-NEXT: sub r8, r8, r24
+; CHECK-PWR7-NEXT: stb r12, 288(r1)
; CHECK-PWR7-NEXT: addi r12, r1, 288
-; CHECK-PWR7-NEXT: addi r25, r1, 208
-; CHECK-PWR7-NEXT: stb r11, 256(r1)
-; CHECK-PWR7-NEXT: addi r11, r1, 272
-; CHECK-PWR7-NEXT: ld r24, 400(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT: stb r8, 240(r1)
-; CHECK-PWR7-NEXT: stb r7, 224(r1)
-; CHECK-PWR7-NEXT: stb r4, 208(r1)
-; CHECK-PWR7-NEXT: stb r26, 192(r1)
-; CHECK-PWR7-NEXT: stb r27, 176(r1)
-; CHECK-PWR7-NEXT: stb r28, 160(r1)
-; CHECK-PWR7-NEXT: stb r30, 144(r1)
+; CHECK-PWR7-NEXT: stb r8, 272(r1)
+; CHECK-PWR7-NEXT: stb r26, 256(r1)
+; CHECK-PWR7-NEXT: stb r30, 240(r1)
+; CHECK-PWR7-NEXT: stb r10, 224(r1)
+; CHECK-PWR7-NEXT: stb r6, 208(r1)
+; CHECK-PWR7-NEXT: stb r4, 192(r1)
+; CHECK-PWR7-NEXT: stb r28, 176(r1)
+; CHECK-PWR7-NEXT: stb r27, 160(r1)
+; CHECK-PWR7-NEXT: stb r29, 144(r1)
; CHECK-PWR7-NEXT: stb r0, 128(r1)
-; CHECK-PWR7-NEXT: stb r10, 112(r1)
+; CHECK-PWR7-NEXT: stb r11, 112(r1)
; CHECK-PWR7-NEXT: stb r9, 96(r1)
-; CHECK-PWR7-NEXT: stb r6, 80(r1)
+; CHECK-PWR7-NEXT: stb r7, 80(r1)
; CHECK-PWR7-NEXT: stb r5, 64(r1)
; CHECK-PWR7-NEXT: stb r3, 48(r1)
; CHECK-PWR7-NEXT: addi r8, r1, 256
-; CHECK-PWR7-NEXT: addi r7, r1, 240
+; CHECK-PWR7-NEXT: addi r26, r1, 240
; CHECK-PWR7-NEXT: lxvw4x v2, 0, r12
-; CHECK-PWR7-NEXT: lxvw4x v3, 0, r11
+; CHECK-PWR7-NEXT: lxvw4x v3, 0, r25
+; CHECK-PWR7-NEXT: addi r10, r1, 224
+; CHECK-PWR7-NEXT: addi r30, r1, 208
; CHECK-PWR7-NEXT: addi r3, r1, 192
; CHECK-PWR7-NEXT: addi r4, r1, 176
; CHECK-PWR7-NEXT: addi r5, r1, 160
; CHECK-PWR7-NEXT: addi r6, r1, 144
; CHECK-PWR7-NEXT: lxvw4x v4, 0, r8
-; CHECK-PWR7-NEXT: lxvw4x v5, 0, r7
-; CHECK-PWR7-NEXT: lxvw4x v0, 0, r29
-; CHECK-PWR7-NEXT: lxvw4x v1, 0, r25
+; CHECK-PWR7-NEXT: lxvw4x v5, 0, r26
; CHECK-PWR7-NEXT: addi r7, r1, 128
; CHECK-PWR7-NEXT: addi r8, r1, 112
-; CHECK-PWR7-NEXT: lxvw4x v6, 0, r3
-; CHECK-PWR7-NEXT: lxvw4x v7, 0, r4
+; CHECK-PWR7-NEXT: lxvw4x v0, 0, r10
+; CHECK-PWR7-NEXT: lxvw4x v1, 0, r30
; CHECK-PWR7-NEXT: vmrghb v2, v3, v2
; CHECK-PWR7-NEXT: addi r9, r1, 96
-; CHECK-PWR7-NEXT: lxvw4x v3, 0, r5
-; CHECK-PWR7-NEXT: lxvw4x v8, 0, r6
+; CHECK-PWR7-NEXT: lxvw4x v6, 0, r3
+; CHECK-PWR7-NEXT: lxvw4x v7, 0, r4
; CHECK-PWR7-NEXT: addi r3, r1, 80
; CHECK-PWR7-NEXT: addi r4, r1, 64
+; CHECK-PWR7-NEXT: lxvw4x v3, 0, r5
+; CHECK-PWR7-NEXT: lxvw4x v8, 0, r6
; CHECK-PWR7-NEXT: addi r5, r1, 48
; CHECK-PWR7-NEXT: vmrghb v4, v5, v4
; CHECK-PWR7-NEXT: lxvw4x v5, 0, r7
@@ -1063,29 +1045,26 @@ define <16 x i8> @sub_absv_8_ext(<16 x i8> %a, <16 x i8> %b) local_unnamed_addr
; CHECK-PWR7-NEXT: lxvw4x v10, 0, r3
; CHECK-PWR7-NEXT: vmrghb v6, v7, v6
; CHECK-PWR7-NEXT: lxvw4x v7, 0, r4
-; CHECK-PWR7-NEXT: ld r30, 448(r1) # 8-byte Folded Reload
; CHECK-PWR7-NEXT: vmrghb v3, v8, v3
; CHECK-PWR7-NEXT: lxvw4x v8, 0, r5
-; CHECK-PWR7-NEXT: ld r29, 440(r1) # 8-byte Folded Reload
; CHECK-PWR7-NEXT: vmrghb v5, v9, v5
-; CHECK-PWR7-NEXT: ld r28, 432(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT: ld r27, 424(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT: ld r30, 400(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT: ld r29, 392(r1) # 8-byte Folded Reload
; CHECK-PWR7-NEXT: vmrghb v1, v10, v1
-; CHECK-PWR7-NEXT: ld r26, 416(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT: ld r25, 408(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT: ld r28, 384(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT: ld r27, 376(r1) # 8-byte Folded Reload
; CHECK-PWR7-NEXT: vmrghb v7, v8, v7
-; CHECK-PWR7-NEXT: ld r23, 392(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT: ld r22, 384(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT: ld r26, 368(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT: ld r25, 360(r1) # 8-byte Folded Reload
; CHECK-PWR7-NEXT: vmrghh v2, v4, v2
-; CHECK-PWR7-NEXT: ld r21, 376(r1) # 8-byte Folded Reload
-; CHECK-PWR7-NEXT: ld r17, 344(r1) # 8-byte Folded Reload
+; CHECK-PWR7-NEXT: ld r24, 352(r1) # 8-byte Folded Reload
; CHECK-PWR7-NEXT: vmrghh v4, v6, v0
; CHECK-PWR7-NEXT: vmrghh v3, v5, v3
; CHECK-PWR7-NEXT: vmrghh v5, v7, v1
; CHECK-PWR7-NEXT: vmrghw v2, v4, v2
; CHECK-PWR7-NEXT: vmrghw v3, v5, v3
; CHECK-PWR7-NEXT: xxmrghd v2, v3, v2
-; CHECK-PWR7-NEXT: addi r1, r1, 464
+; CHECK-PWR7-NEXT: addi r1, r1, 416
; CHECK-PWR7-NEXT: blr
entry:
%vecext = extractelement <16 x i8> %a, i32 0
diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll
index 64c9e35146f63..29e481198246c 100644
--- a/llvm/test/CodeGen/RISCV/rv32zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll
@@ -734,8 +734,8 @@ define i32 @abs_i32(i32 %x) {
; RV32I-LABEL: abs_i32:
; RV32I: # %bb.0:
; RV32I-NEXT: srai a1, a0, 31
-; RV32I-NEXT: add a0, a0, a1
; RV32I-NEXT: xor a0, a0, a1
+; RV32I-NEXT: sub a0, a0, a1
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: abs_i32:
diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll
index a5e3061f50953..0127ac4d33a5b 100644
--- a/llvm/test/CodeGen/RISCV/rv64zbb.ll
+++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll
@@ -947,8 +947,8 @@ define i32 @abs_i32(i32 %x) {
; RV64I: # %bb.0:
; RV64I-NEXT: sext.w a0, a0
; RV64I-NEXT: srai a1, a0, 63
-; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: abs_i32:
@@ -961,14 +961,13 @@ define i32 @abs_i32(i32 %x) {
ret i32 %abs
}
-; FIXME: We can remove the sext.w by using addw for RV64I and negw for RV64ZBB.
+; FIXME: We can remove the sext.w on RV64ZBB by using negw.
define signext i32 @abs_i32_sext(i32 signext %x) {
; RV64I-LABEL: abs_i32_sext:
; RV64I: # %bb.0:
; RV64I-NEXT: srai a1, a0, 63
-; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: xor a0, a0, a1
-; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: subw a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: abs_i32_sext:
@@ -987,8 +986,8 @@ define i64 @abs_i64(i64 %x) {
; RV64I-LABEL: abs_i64:
; RV64I: # %bb.0:
; RV64I-NEXT: srai a1, a0, 63
-; RV64I-NEXT: add a0, a0, a1
; RV64I-NEXT: xor a0, a0, a1
+; RV64I-NEXT: sub a0, a0, a1
; RV64I-NEXT: ret
;
; RV64ZBB-LABEL: abs_i64:
diff --git a/llvm/test/CodeGen/Thumb/iabs.ll b/llvm/test/CodeGen/Thumb/iabs.ll
index 2d51288b5242a..6bebea67e265a 100644
--- a/llvm/test/CodeGen/Thumb/iabs.ll
+++ b/llvm/test/CodeGen/Thumb/iabs.ll
@@ -6,8 +6,8 @@ define i8 @test_i8(i8 %a) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: sxtb r1, r0
; CHECK-NEXT: asrs r1, r1, #7
-; CHECK-NEXT: adds r0, r0, r1
; CHECK-NEXT: eors r0, r1
+; CHECK-NEXT: subs r0, r0, r1
; CHECK-NEXT: bx lr
%tmp1neg = sub i8 0, %a
%b = icmp sgt i8 %a, -1
@@ -20,8 +20,8 @@ define i16 @test_i16(i16 %a) nounwind {
; CHECK: @ %bb.0:
; CHECK-NEXT: sxth r1, r0
; CHECK-NEXT: asrs r1, r1, #15
-; CHECK-NEXT: adds r0, r0, r1
; CHECK-NEXT: eors r0, r1
+; CHECK-NEXT: subs r0, r0, r1
; CHECK-NEXT: bx lr
%tmp1neg = sub i16 0, %a
%b = icmp sgt i16 %a, -1
@@ -33,8 +33,8 @@ define i32 @test_i32(i32 %a) nounwind {
; CHECK-LABEL: test_i32:
; CHECK: @ %bb.0:
; CHECK-NEXT: asrs r1, r0, #31
-; CHECK-NEXT: adds r0, r0, r1
; CHECK-NEXT: eors r0, r1
+; CHECK-NEXT: subs r0, r0, r1
; CHECK-NEXT: bx lr
%tmp1neg = sub i32 0, %a
%b = icmp sgt i32 %a, -1
@@ -46,10 +46,10 @@ define i64 @test_i64(i64 %a) nounwind {
; CHECK-LABEL: test_i64:
; CHECK: @ %bb.0:
; CHECK-NEXT: asrs r2, r1, #31
-; CHECK-NEXT: adds r0, r0, r2
-; CHECK-NEXT: adcs r1, r2
-; CHECK-NEXT: eors r0, r2
; CHECK-NEXT: eors r1, r2
+; CHECK-NEXT: eors r0, r2
+; CHECK-NEXT: subs r0, r0, r2
+; CHECK-NEXT: sbcs r1, r2
; CHECK-NEXT: bx lr
%tmp1neg = sub i64 0, %a
%b = icmp sgt i64 %a, -1
diff --git a/llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll b/llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll
index bd091cf2b6f84..152ac0fa3f168 100644
--- a/llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll
+++ b/llvm/test/CodeGen/Thumb/optionaldef-scheduling.ll
@@ -1,7 +1,51 @@
-; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | FileCheck %s
-; RUN: llc -mtriple=thumbv6-eabi %s -verify-machineinstrs -o - | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumb-eabi %s -verify-machineinstrs -o - | FileCheck %s --check-prefix=THUMB
+; RUN: llc -mtriple=thumbv6-eabi %s -verify-machineinstrs -o - | FileCheck %s --check-prefix=THUMBV6
+
+; The scheduler used to ignore OptionalDefs, and could unwittingly insert
+; a flag-setting instruction in between an ADDS and the corresponding ADC.
+
+; FIXME: The ABS lowering changed to XOR followed by SUB so this may no longer
+; be testing what it used to.
define i1 @test(i64 %arg) {
+; THUMB-LABEL: test:
+; THUMB: @ %bb.0: @ %entry
+; THUMB-NEXT: .save {r4, lr}
+; THUMB-NEXT: push {r4, lr}
+; THUMB-NEXT: asrs r2, r1, #31
+; THUMB-NEXT: movs r3, r1
+; THUMB-NEXT: eors r3, r2
+; THUMB-NEXT: movs r4, r0
+; THUMB-NEXT: eors r4, r2
+; THUMB-NEXT: subs r4, r4, r2
+; THUMB-NEXT: sbcs r3, r2
+; THUMB-NEXT: eors r3, r1
+; THUMB-NEXT: eors r0, r4
+; THUMB-NEXT: orrs r0, r3
+; THUMB-NEXT: rsbs r1, r0, #0
+; THUMB-NEXT: adcs r0, r1
+; THUMB-NEXT: pop {r4}
+; THUMB-NEXT: pop {r1}
+; THUMB-NEXT: bx r1
+;
+; THUMBV6-LABEL: test:
+; THUMBV6: @ %bb.0: @ %entry
+; THUMBV6-NEXT: .save {r4, lr}
+; THUMBV6-NEXT: push {r4, lr}
+; THUMBV6-NEXT: asrs r2, r1, #31
+; THUMBV6-NEXT: mov r3, r1
+; THUMBV6-NEXT: eors r3, r2
+; THUMBV6-NEXT: mov r4, r0
+; THUMBV6-NEXT: eors r4, r2
+; THUMBV6-NEXT: subs r4, r4, r2
+; THUMBV6-NEXT: sbcs r3, r2
+; THUMBV6-NEXT: eors r3, r1
+; THUMBV6-NEXT: eors r0, r4
+; THUMBV6-NEXT: orrs r0, r3
+; THUMBV6-NEXT: rsbs r1, r0, #0
+; THUMBV6-NEXT: adcs r0, r1
+; THUMBV6-NEXT: pop {r4, pc}
entry:
%ispos = icmp sgt i64 %arg, -1
%neg = sub i64 0, %arg
@@ -9,10 +53,3 @@ entry:
%cmp2 = icmp eq i64 %sel, %arg
ret i1 %cmp2
}
-
-; The scheduler used to ignore OptionalDefs, and could unwittingly insert
-; a flag-setting instruction in between an ADDS and the corresponding ADC.
-
-; CHECK: adds
-; CHECK-NOT: eors
-; CHECK: adcs
diff --git a/llvm/test/CodeGen/Thumb2/abs.ll b/llvm/test/CodeGen/Thumb2/abs.ll
index 02a2a14c2a5cc..88259ba758803 100644
--- a/llvm/test/CodeGen/Thumb2/abs.ll
+++ b/llvm/test/CodeGen/Thumb2/abs.ll
@@ -120,18 +120,18 @@ define i64 @abs64(i64 %x) {
; CHECKT1-LABEL: abs64:
; CHECKT1: @ %bb.0:
; CHECKT1-NEXT: asrs r2, r1, #31
-; CHECKT1-NEXT: adds r0, r0, r2
-; CHECKT1-NEXT: adcs r1, r2
-; CHECKT1-NEXT: eors r0, r2
; CHECKT1-NEXT: eors r1, r2
+; CHECKT1-NEXT: eors r0, r2
+; CHECKT1-NEXT: subs r0, r0, r2
+; CHECKT1-NEXT: sbcs r1, r2
; CHECKT1-NEXT: bx lr
;
; CHECKT2-LABEL: abs64:
; CHECKT2: @ %bb.0:
-; CHECKT2-NEXT: adds.w r0, r0, r1, asr #31
-; CHECKT2-NEXT: adc.w r2, r1, r1, asr #31
; CHECKT2-NEXT: eor.w r0, r0, r1, asr #31
-; CHECKT2-NEXT: eor.w r1, r2, r1, asr #31
+; CHECKT2-NEXT: eor.w r2, r1, r1, asr #31
+; CHECKT2-NEXT: subs.w r0, r0, r1, asr #31
+; CHECKT2-NEXT: sbc.w r1, r2, r1, asr #31
; CHECKT2-NEXT: bx lr
%abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true)
ret i64 %abs
@@ -141,8 +141,8 @@ define i32 @abs32(i32 %x) {
; CHECKT1-LABEL: abs32:
; CHECKT1: @ %bb.0:
; CHECKT1-NEXT: asrs r1, r0, #31
-; CHECKT1-NEXT: adds r0, r0, r1
; CHECKT1-NEXT: eors r0, r1
+; CHECKT1-NEXT: subs r0, r0, r1
; CHECKT1-NEXT: bx lr
;
; CHECKT2-LABEL: abs32:
@@ -160,15 +160,15 @@ define i16 @abs16(i16 %x) {
; CHECKT1: @ %bb.0:
; CHECKT1-NEXT: sxth r1, r0
; CHECKT1-NEXT: asrs r1, r1, #15
-; CHECKT1-NEXT: adds r0, r0, r1
; CHECKT1-NEXT: eors r0, r1
+; CHECKT1-NEXT: subs r0, r0, r1
; CHECKT1-NEXT: bx lr
;
; CHECKT2-LABEL: abs16:
; CHECKT2: @ %bb.0:
; CHECKT2-NEXT: sxth r1, r0
-; CHECKT2-NEXT: add.w r0, r0, r1, asr #15
; CHECKT2-NEXT: eor.w r0, r0, r1, asr #15
+; CHECKT2-NEXT: sub.w r0, r0, r1, asr #15
; CHECKT2-NEXT: bx lr
%abs = tail call i16 @llvm.abs.i16(i16 %x, i1 true)
ret i16 %abs
@@ -180,26 +180,26 @@ define i128 @abs128(i128 %x) {
; CHECKT1-NEXT: .save {r4, lr}
; CHECKT1-NEXT: push {r4, lr}
; CHECKT1-NEXT: asrs r4, r3, #31
-; CHECKT1-NEXT: adds r0, r0, r4
-; CHECKT1-NEXT: adcs r1, r4
-; CHECKT1-NEXT: adcs r2, r4
-; CHECKT1-NEXT: adcs r3, r4
-; CHECKT1-NEXT: eors r0, r4
-; CHECKT1-NEXT: eors r1, r4
-; CHECKT1-NEXT: eors r2, r4
; CHECKT1-NEXT: eors r3, r4
+; CHECKT1-NEXT: eors r2, r4
+; CHECKT1-NEXT: eors r1, r4
+; CHECKT1-NEXT: eors r0, r4
+; CHECKT1-NEXT: subs r0, r0, r4
+; CHECKT1-NEXT: sbcs r1, r4
+; CHECKT1-NEXT: sbcs r2, r4
+; CHECKT1-NEXT: sbcs r3, r4
; CHECKT1-NEXT: pop {r4, pc}
;
; CHECKT2-LABEL: abs128:
; CHECKT2: @ %bb.0:
-; CHECKT2-NEXT: adds.w r0, r0, r3, asr #31
-; CHECKT2-NEXT: adcs.w r1, r1, r3, asr #31
; CHECKT2-NEXT: eor.w r0, r0, r3, asr #31
-; CHECKT2-NEXT: adcs.w r2, r2, r3, asr #31
; CHECKT2-NEXT: eor.w r1, r1, r3, asr #31
-; CHECKT2-NEXT: adc.w r12, r3, r3, asr #31
+; CHECKT2-NEXT: subs.w r0, r0, r3, asr #31
; CHECKT2-NEXT: eor.w r2, r2, r3, asr #31
-; CHECKT2-NEXT: eor.w r3, r12, r3, asr #31
+; CHECKT2-NEXT: sbcs.w r1, r1, r3, asr #31
+; CHECKT2-NEXT: eor.w r12, r3, r3, asr #31
+; CHECKT2-NEXT: sbcs.w r2, r2, r3, asr #31
+; CHECKT2-NEXT: sbc.w r3, r12, r3, asr #31
; CHECKT2-NEXT: bx lr
%abs = tail call i128 @llvm.abs.i128(i128 %x, i1 true)
ret i128 %abs
diff --git a/llvm/test/CodeGen/WebAssembly/PR41149.ll b/llvm/test/CodeGen/WebAssembly/PR41149.ll
index 6a8dee1906f1b..0913bf0eba220 100644
--- a/llvm/test/CodeGen/WebAssembly/PR41149.ll
+++ b/llvm/test/CodeGen/WebAssembly/PR41149.ll
@@ -13,9 +13,9 @@ define void @mod() {
; CHECK-NEXT: i32.const 31
; CHECK-NEXT: i32.shr_s
; CHECK-NEXT: local.tee 0
-; CHECK-NEXT: i32.add
-; CHECK-NEXT: local.get 0
; CHECK-NEXT: i32.xor
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.sub
; CHECK-NEXT: i32.store8 0
%tmp = load <4 x i8>, <4 x i8>* undef
%tmp2 = icmp slt <4 x i8> %tmp, zeroinitializer
diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll
index c03923aa47ff5..df83381ababd3 100644
--- a/llvm/test/CodeGen/X86/abs.ll
+++ b/llvm/test/CodeGen/X86/abs.ll
@@ -25,11 +25,11 @@ declare <16 x i8> @llvm.abs.v16i8(<16 x i8>, i1)
define i8 @test_i8(i8 %a) nounwind {
; X64-LABEL: test_i8:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %eax, %ecx
; X64-NEXT: sarb $7, %cl
-; X64-NEXT: leal (%rdi,%rcx), %eax
; X64-NEXT: xorb %cl, %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
;
@@ -38,8 +38,8 @@ define i8 @test_i8(i8 %a) nounwind {
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: sarb $7, %cl
-; X86-NEXT: addb %cl, %al
; X86-NEXT: xorb %cl, %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
%r = call i8 @llvm.abs.i8(i8 %a, i1 false)
ret i8 %r
@@ -197,8 +197,8 @@ define <2 x i32> @test_v2i32(<2 x i32> %a) nounwind {
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrad $31, %xmm1
-; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v2i32:
@@ -226,8 +226,8 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) nounwind {
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrad $31, %xmm1
-; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v3i32:
@@ -261,8 +261,8 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) nounwind {
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm1
; SSE-NEXT: psrad $31, %xmm1
-; SSE-NEXT: paddd %xmm1, %xmm0
; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: psubd %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: test_v4i32:
@@ -309,12 +309,12 @@ define <8 x i32> @test_v8i32(<8 x i32> %a) nounwind {
; SSE: # %bb.0:
; SSE-NEXT: movdqa %xmm0, %xmm2
; SSE-NEXT: psrad $31, %xmm2
-; SSE-NEXT: paddd %xmm2, %xmm0
; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: psubd %xmm2, %xmm0
; SSE-NEXT: movdqa %xmm1, %xmm2
; SSE-NEXT: psrad $31, %xmm2
-; SSE-NEXT: paddd %xmm2, %xmm1
; SSE-NEXT: pxor %xmm2, %xmm1
+; SSE-NEXT: psubd %xmm2, %xmm1
; SSE-NEXT: retq
;
; AVX1-LABEL: test_v8i32:
@@ -496,86 +496,86 @@ define <16 x i8> @test_v16i8(<16 x i8> %a) nounwind {
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movb %cl, %al
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %cl
; X86-NEXT: xorb %al, %cl
+; X86-NEXT: subb %al, %cl
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb %dl, %al
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %dl
; X86-NEXT: xorb %al, %dl
+; X86-NEXT: subb %al, %dl
; X86-NEXT: movb %dl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb %ah, %al
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %ah
; X86-NEXT: xorb %al, %ah
+; X86-NEXT: subb %al, %ah
; X86-NEXT: movb %ah, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb %ch, %al
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %ch
; X86-NEXT: xorb %al, %ch
+; X86-NEXT: subb %al, %ch
; X86-NEXT: movb %ch, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb %dh, %al
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %dh
; X86-NEXT: xorb %al, %dh
+; X86-NEXT: subb %al, %dh
; X86-NEXT: movb %dh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %bl
; X86-NEXT: xorb %al, %bl
+; X86-NEXT: subb %al, %bl
; X86-NEXT: movb %bl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb %bh, %al
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %bh
; X86-NEXT: xorb %al, %bh
+; X86-NEXT: subb %al, %bh
; X86-NEXT: movb %bh, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %cl
; X86-NEXT: xorb %al, %cl
+; X86-NEXT: subb %al, %cl
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %cl
; X86-NEXT: xorb %al, %cl
+; X86-NEXT: subb %al, %cl
; X86-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
; X86-NEXT: movb {{[0-9]+}}(%esp), %bh
; X86-NEXT: movb %bh, %al
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %bh
; X86-NEXT: xorb %al, %bh
+; X86-NEXT: subb %al, %bh
; X86-NEXT: movb {{[0-9]+}}(%esp), %bl
; X86-NEXT: movl %ebx, %eax
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %bl
; X86-NEXT: xorb %al, %bl
+; X86-NEXT: subb %al, %bl
; X86-NEXT: movb {{[0-9]+}}(%esp), %dh
; X86-NEXT: movb %dh, %al
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %dh
; X86-NEXT: xorb %al, %dh
+; X86-NEXT: subb %al, %dh
; X86-NEXT: movb {{[0-9]+}}(%esp), %ch
; X86-NEXT: movb %ch, %al
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %ch
; X86-NEXT: xorb %al, %ch
+; X86-NEXT: subb %al, %ch
; X86-NEXT: movb {{[0-9]+}}(%esp), %dl
; X86-NEXT: movl %edx, %eax
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %dl
; X86-NEXT: xorb %al, %dl
+; X86-NEXT: subb %al, %dl
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: sarb $7, %al
-; X86-NEXT: addb %al, %cl
; X86-NEXT: xorb %al, %cl
+; X86-NEXT: subb %al, %cl
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: movb %al, %ah
; X86-NEXT: sarb $7, %ah
-; X86-NEXT: addb %ah, %al
; X86-NEXT: xorb %ah, %al
+; X86-NEXT: subb %ah, %al
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movb %al, 15(%esi)
; X86-NEXT: movb %cl, 14(%esi)
diff --git a/llvm/test/CodeGen/X86/combine-abs.ll b/llvm/test/CodeGen/X86/combine-abs.ll
index fd5930217cb06..de20b4dccb20a 100644
--- a/llvm/test/CodeGen/X86/combine-abs.ll
+++ b/llvm/test/CodeGen/X86/combine-abs.ll
@@ -110,13 +110,13 @@ define <4 x i64> @combine_v4i64_abs_abs(<4 x i64> %a) {
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSE2-NEXT: paddq %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubq %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSE2-NEXT: paddq %xmm2, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubq %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSE42-LABEL: combine_v4i64_abs_abs:
diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll
index a00ec41516c83..1cbb8360440d9 100644
--- a/llvm/test/CodeGen/X86/iabs.ll
+++ b/llvm/test/CodeGen/X86/iabs.ll
@@ -15,17 +15,17 @@ define i8 @test_i8(i8 %a) nounwind {
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: sarb $7, %cl
-; X86-NEXT: addb %cl, %al
; X86-NEXT: xorb %cl, %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: test_i8:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %eax, %ecx
; X64-NEXT: sarb $7, %cl
-; X64-NEXT: leal (%rdi,%rcx), %eax
; X64-NEXT: xorb %cl, %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%tmp1neg = sub i8 0, %a
@@ -40,8 +40,8 @@ define i16 @test_i16(i16 %a) nounwind {
; X86-NO-CMOV-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NO-CMOV-NEXT: movl %eax, %ecx
; X86-NO-CMOV-NEXT: sarl $15, %ecx
-; X86-NO-CMOV-NEXT: addl %ecx, %eax
; X86-NO-CMOV-NEXT: xorl %ecx, %eax
+; X86-NO-CMOV-NEXT: subl %ecx, %eax
; X86-NO-CMOV-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NO-CMOV-NEXT: retl
;
@@ -71,8 +71,8 @@ define i32 @test_i32(i32 %a) nounwind {
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NO-CMOV-NEXT: movl %eax, %ecx
; X86-NO-CMOV-NEXT: sarl $31, %ecx
-; X86-NO-CMOV-NEXT: addl %ecx, %eax
; X86-NO-CMOV-NEXT: xorl %ecx, %eax
+; X86-NO-CMOV-NEXT: subl %ecx, %eax
; X86-NO-CMOV-NEXT: retl
;
; X86-CMOV-LABEL: test_i32:
diff --git a/llvm/test/CodeGen/X86/neg-abs.ll b/llvm/test/CodeGen/X86/neg-abs.ll
index f34f683db078e..ee2564660a066 100644
--- a/llvm/test/CodeGen/X86/neg-abs.ll
+++ b/llvm/test/CodeGen/X86/neg-abs.ll
@@ -154,24 +154,21 @@ define i128 @neg_abs_i128(i128 %x) nounwind {
define i8 @sub_abs_i8(i8 %x, i8 %y) nounwind {
; X86-LABEL: sub_abs_i8:
; X86: # %bb.0:
-; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: movb {{[0-9]+}}(%esp), %cl
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarb $7, %dl
-; X86-NEXT: addb %dl, %cl
-; X86-NEXT: xorb %dl, %cl
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarb $7, %al
+; X86-NEXT: xorb %al, %cl
; X86-NEXT: subb %cl, %al
+; X86-NEXT: addb {{[0-9]+}}(%esp), %al
; X86-NEXT: retl
;
; X64-LABEL: sub_abs_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: movl %edi, %ecx
-; X64-NEXT: sarb $7, %cl
-; X64-NEXT: addb %cl, %dil
-; X64-NEXT: xorb %cl, %dil
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: sarb $7, %al
+; X64-NEXT: xorb %al, %dil
; X64-NEXT: subb %dil, %al
-; X64-NEXT: # kill: def $al killed $al killed $eax
+; X64-NEXT: addb %sil, %al
; X64-NEXT: retq
%abs = tail call i8 @llvm.abs.i8(i8 %x, i1 false)
%neg = sub nsw i8 %y, %abs
@@ -181,13 +178,12 @@ define i8 @sub_abs_i8(i8 %x, i8 %y) nounwind {
define i16 @sub_abs_i16(i16 %x, i16 %y) nounwind {
; X86-LABEL: sub_abs_i16:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $15, %edx
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: xorl %edx, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $15, %eax
+; X86-NEXT: xorl %eax, %ecx
; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
@@ -207,13 +203,12 @@ define i16 @sub_abs_i16(i16 %x, i16 %y) nounwind {
define i32 @sub_abs_i32(i32 %x, i32 %y) nounwind {
; X86-LABEL: sub_abs_i32:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: sarl $31, %edx
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: xorl %edx, %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: sarl $31, %eax
+; X86-NEXT: xorl %eax, %ecx
; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: retl
;
; X64-LABEL: sub_abs_i32:
diff --git a/llvm/test/CodeGen/X86/viabs.ll b/llvm/test/CodeGen/X86/viabs.ll
index 405d9eaa2c834..d892297d81ea6 100644
--- a/llvm/test/CodeGen/X86/viabs.ll
+++ b/llvm/test/CodeGen/X86/viabs.ll
@@ -12,8 +12,8 @@ define <4 x i32> @test_abs_gt_v4i32(<4 x i32> %a) nounwind {
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
-; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test_abs_gt_v4i32:
@@ -51,8 +51,8 @@ define <4 x i32> @test_abs_ge_v4i32(<4 x i32> %a) nounwind {
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
-; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test_abs_ge_v4i32:
@@ -176,8 +176,8 @@ define <4 x i32> @test_abs_le_v4i32(<4 x i32> %a) nounwind {
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
-; SSE2-NEXT: paddd %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test_abs_le_v4i32:
@@ -215,12 +215,12 @@ define <8 x i32> @test_abs_gt_v8i32(<8 x i32> %a) nounwind {
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
-; SSE2-NEXT: paddd %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubd %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
-; SSE2-NEXT: paddd %xmm2, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubd %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test_abs_gt_v8i32:
@@ -263,12 +263,12 @@ define <8 x i32> @test_abs_ge_v8i32(<8 x i32> %a) nounwind {
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
-; SSE2-NEXT: paddd %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubd %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
-; SSE2-NEXT: paddd %xmm2, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubd %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test_abs_ge_v8i32:
@@ -413,12 +413,12 @@ define <8 x i32> @test_abs_le_v8i32(<8 x i32> %a) nounwind {
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
-; SSE2-NEXT: paddd %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubd %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
-; SSE2-NEXT: paddd %xmm2, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubd %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test_abs_le_v8i32:
@@ -461,20 +461,20 @@ define <16 x i32> @test_abs_le_16i32(<16 x i32> %a) nounwind {
; SSE2: # %bb.0:
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
-; SSE2-NEXT: paddd %xmm4, %xmm0
; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: psubd %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
-; SSE2-NEXT: paddd %xmm4, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: psubd %xmm4, %xmm1
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
-; SSE2-NEXT: paddd %xmm4, %xmm2
; SSE2-NEXT: pxor %xmm4, %xmm2
+; SSE2-NEXT: psubd %xmm4, %xmm2
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
-; SSE2-NEXT: paddd %xmm4, %xmm3
; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: psubd %xmm4, %xmm3
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test_abs_le_16i32:
@@ -527,8 +527,8 @@ define <2 x i64> @test_abs_ge_v2i64(<2 x i64> %a) nounwind {
; SSE2-NEXT: movdqa %xmm0, %xmm1
; SSE2-NEXT: psrad $31, %xmm1
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSE2-NEXT: paddq %xmm1, %xmm0
; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: psubq %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test_abs_ge_v2i64:
@@ -536,8 +536,8 @@ define <2 x i64> @test_abs_ge_v2i64(<2 x i64> %a) nounwind {
; SSSE3-NEXT: movdqa %xmm0, %xmm1
; SSSE3-NEXT: psrad $31, %xmm1
; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
-; SSSE3-NEXT: paddq %xmm1, %xmm0
; SSSE3-NEXT: pxor %xmm1, %xmm0
+; SSSE3-NEXT: psubq %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test_abs_ge_v2i64:
@@ -577,13 +577,13 @@ define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind {
; SSE2-NEXT: movdqa %xmm0, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSE2-NEXT: paddq %xmm2, %xmm0
; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: psubq %xmm2, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm2
; SSE2-NEXT: psrad $31, %xmm2
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSE2-NEXT: paddq %xmm2, %xmm1
; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: psubq %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test_abs_gt_v4i64:
@@ -591,13 +591,13 @@ define <4 x i64> @test_abs_gt_v4i64(<4 x i64> %a) nounwind {
; SSSE3-NEXT: movdqa %xmm0, %xmm2
; SSSE3-NEXT: psrad $31, %xmm2
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSSE3-NEXT: paddq %xmm2, %xmm0
; SSSE3-NEXT: pxor %xmm2, %xmm0
+; SSSE3-NEXT: psubq %xmm2, %xmm0
; SSSE3-NEXT: movdqa %xmm1, %xmm2
; SSSE3-NEXT: psrad $31, %xmm2
; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3]
-; SSSE3-NEXT: paddq %xmm2, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm1
+; SSSE3-NEXT: psubq %xmm2, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test_abs_gt_v4i64:
@@ -646,23 +646,23 @@ define <8 x i64> @test_abs_le_v8i64(<8 x i64> %a) nounwind {
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT: paddq %xmm4, %xmm0
; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: psubq %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT: paddq %xmm4, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: psubq %xmm4, %xmm1
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT: paddq %xmm4, %xmm2
; SSE2-NEXT: pxor %xmm4, %xmm2
+; SSE2-NEXT: psubq %xmm4, %xmm2
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT: paddq %xmm4, %xmm3
; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: psubq %xmm4, %xmm3
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test_abs_le_v8i64:
@@ -670,23 +670,23 @@ define <8 x i64> @test_abs_le_v8i64(<8 x i64> %a) nounwind {
; SSSE3-NEXT: movdqa %xmm0, %xmm4
; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT: paddq %xmm4, %xmm0
; SSSE3-NEXT: pxor %xmm4, %xmm0
+; SSSE3-NEXT: psubq %xmm4, %xmm0
; SSSE3-NEXT: movdqa %xmm1, %xmm4
; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT: paddq %xmm4, %xmm1
; SSSE3-NEXT: pxor %xmm4, %xmm1
+; SSSE3-NEXT: psubq %xmm4, %xmm1
; SSSE3-NEXT: movdqa %xmm2, %xmm4
; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT: paddq %xmm4, %xmm2
; SSSE3-NEXT: pxor %xmm4, %xmm2
+; SSSE3-NEXT: psubq %xmm4, %xmm2
; SSSE3-NEXT: movdqa %xmm3, %xmm4
; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT: paddq %xmm4, %xmm3
; SSSE3-NEXT: pxor %xmm4, %xmm3
+; SSSE3-NEXT: psubq %xmm4, %xmm3
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test_abs_le_v8i64:
@@ -754,23 +754,23 @@ define <8 x i64> @test_abs_le_v8i64_fold(<8 x i64>* %a.ptr) nounwind {
; SSE2-NEXT: movdqa %xmm0, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT: paddq %xmm4, %xmm0
; SSE2-NEXT: pxor %xmm4, %xmm0
+; SSE2-NEXT: psubq %xmm4, %xmm0
; SSE2-NEXT: movdqa %xmm1, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT: paddq %xmm4, %xmm1
; SSE2-NEXT: pxor %xmm4, %xmm1
+; SSE2-NEXT: psubq %xmm4, %xmm1
; SSE2-NEXT: movdqa %xmm2, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT: paddq %xmm4, %xmm2
; SSE2-NEXT: pxor %xmm4, %xmm2
+; SSE2-NEXT: psubq %xmm4, %xmm2
; SSE2-NEXT: movdqa %xmm3, %xmm4
; SSE2-NEXT: psrad $31, %xmm4
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSE2-NEXT: paddq %xmm4, %xmm3
; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: psubq %xmm4, %xmm3
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test_abs_le_v8i64_fold:
@@ -782,23 +782,23 @@ define <8 x i64> @test_abs_le_v8i64_fold(<8 x i64>* %a.ptr) nounwind {
; SSSE3-NEXT: movdqa %xmm0, %xmm4
; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT: paddq %xmm4, %xmm0
; SSSE3-NEXT: pxor %xmm4, %xmm0
+; SSSE3-NEXT: psubq %xmm4, %xmm0
; SSSE3-NEXT: movdqa %xmm1, %xmm4
; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT: paddq %xmm4, %xmm1
; SSSE3-NEXT: pxor %xmm4, %xmm1
+; SSSE3-NEXT: psubq %xmm4, %xmm1
; SSSE3-NEXT: movdqa %xmm2, %xmm4
; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT: paddq %xmm4, %xmm2
; SSSE3-NEXT: pxor %xmm4, %xmm2
+; SSSE3-NEXT: psubq %xmm4, %xmm2
; SSSE3-NEXT: movdqa %xmm3, %xmm4
; SSSE3-NEXT: psrad $31, %xmm4
; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,1,3,3]
-; SSSE3-NEXT: paddq %xmm4, %xmm3
; SSSE3-NEXT: pxor %xmm4, %xmm3
+; SSSE3-NEXT: psubq %xmm4, %xmm3
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test_abs_le_v8i64_fold:
diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected
index 78abaf5168068..9ae01c167b8da 100644
--- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected
+++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/basic.ll.expected
@@ -9,17 +9,17 @@ define i8 @test_i8(i8 %a) nounwind {
; X86-NEXT: movb {{[0-9]+}}(%esp), %al
; X86-NEXT: movl %eax, %ecx
; X86-NEXT: sarb $7, %cl
-; X86-NEXT: addb %cl, %al
; X86-NEXT: xorb %cl, %al
+; X86-NEXT: subb %cl, %al
; X86-NEXT: retl
;
; X64-LABEL: test_i8:
; X64: # %bb.0:
-; X64-NEXT: # kill: def $edi killed $edi def $rdi
-; X64-NEXT: movl %edi, %ecx
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: movl %eax, %ecx
; X64-NEXT: sarb $7, %cl
-; X64-NEXT: leal (%rdi,%rcx), %eax
; X64-NEXT: xorb %cl, %al
+; X64-NEXT: subb %cl, %al
; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%tmp1neg = sub i8 0, %a
@@ -34,8 +34,8 @@ define i16 @test_i16(i16 %a) nounwind {
; X86-NO-CMOV-NEXT: movswl {{[0-9]+}}(%esp), %eax
; X86-NO-CMOV-NEXT: movl %eax, %ecx
; X86-NO-CMOV-NEXT: sarl $15, %ecx
-; X86-NO-CMOV-NEXT: addl %ecx, %eax
; X86-NO-CMOV-NEXT: xorl %ecx, %eax
+; X86-NO-CMOV-NEXT: subl %ecx, %eax
; X86-NO-CMOV-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NO-CMOV-NEXT: retl
;
@@ -65,8 +65,8 @@ define i32 @test_i32(i32 %a) nounwind {
; X86-NO-CMOV-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NO-CMOV-NEXT: movl %eax, %ecx
; X86-NO-CMOV-NEXT: sarl $31, %ecx
-; X86-NO-CMOV-NEXT: addl %ecx, %eax
; X86-NO-CMOV-NEXT: xorl %ecx, %eax
+; X86-NO-CMOV-NEXT: subl %ecx, %eax
; X86-NO-CMOV-NEXT: retl
;
; X86-CMOV-LABEL: test_i32:
More information about the llvm-commits
mailing list