[llvm] Remove the uaddo-only specification (PR #160392)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 25 06:21:40 PDT 2025
https://github.com/AZero13 updated https://github.com/llvm/llvm-project/pull/160392
>From 86dc85139e746a2a53f9d466ff374b3ffe3d47bf Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 23 Sep 2025 14:27:07 -0400
Subject: [PATCH 01/12] [CodeGenPrepare] Bail out of usubo creation if sub's
parent is not the same as the comparison
We match uadd's behavior here.
Codegen comparison: https://godbolt.org/z/x8j4EhGno
---
llvm/lib/CodeGen/CodeGenPrepare.cpp | 6 ++++++
llvm/test/CodeGen/X86/usub_inc_iv.ll | 10 +++++-----
2 files changed, 11 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index d290f202f3cca..eb73d01b3558c 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1749,6 +1749,12 @@ bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
Sub->hasNUsesOrMore(1)))
return false;
+ // We don't want to move around uses of condition values this late, so we
+ // check if it is legal to create the call to the intrinsic in the basic
+ // block containing the icmp.
+ if (Sub->getParent() != Cmp->getParent() && !Sub->hasOneUse())
+ return false;
+
if (!replaceMathCmpWithIntrinsic(Sub, Sub->getOperand(0), Sub->getOperand(1),
Cmp, Intrinsic::usub_with_overflow))
return false;
diff --git a/llvm/test/CodeGen/X86/usub_inc_iv.ll b/llvm/test/CodeGen/X86/usub_inc_iv.ll
index 88bfddb51f2d4..ff06aaabd1b0c 100644
--- a/llvm/test/CodeGen/X86/usub_inc_iv.ll
+++ b/llvm/test/CodeGen/X86/usub_inc_iv.ll
@@ -303,14 +303,14 @@ define i32 @test_06(ptr %p, i64 %len, i32 %x) {
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[MATH:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[IV]], i64 1)
-; CHECK-NEXT: [[MATH]] = extractvalue { i64, i1 } [[TMP0]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
+; CHECK-NEXT: [[OV:%.*]] = icmp eq i64 [[IV]], 0
; CHECK-NEXT: br i1 [[OV]], label [[EXIT:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
-; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[MATH]], 4
+; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4
; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, ptr [[P:%.*]], i64 [[SUNKADDR]]
-; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, ptr [[SUNKADDR1]] unordered, align 4
+; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, ptr [[SUNKADDR1]], i64 -4
+; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, ptr [[SUNKADDR2]] unordered, align 4
+; CHECK-NEXT: [[MATH]] = add i64 [[IV]], -1
; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]
; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]]
; CHECK: exit:
>From d4382cc5e8d13ada17ffec8ec4a02d665949089e Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 23 Sep 2025 16:05:38 -0400
Subject: [PATCH 02/12] usubo
---
llvm/include/llvm/CodeGen/TargetLowering.h | 9 +--------
llvm/lib/Target/X86/X86ISelLowering.cpp | 6 ++----
2 files changed, 3 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 75c073bb3871c..590f61f08a9f5 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3455,19 +3455,12 @@ class LLVM_ABI TargetLoweringBase {
/// matching of other patterns.
virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
bool MathUsed) const {
- // TODO: The default logic is inherited from code in CodeGenPrepare.
- // The opcode should not make a difference by default?
- if (Opcode != ISD::UADDO)
- return false;
-
// Allow the transform as long as we have an integer type that is not
// obviously illegal and unsupported and if the math result is used
// besides the overflow check. On some targets (e.g. SPARC), it is
// not profitable to form on overflow op if the math result has no
// concrete users.
- if (VT.isVector())
- return false;
- return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT));
+ return MathUsed && isOperationLegalOrCustomOrPromote(Opcode, VT);
}
// Return true if it is profitable to use a scalar input to a BUILD_VECTOR
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1c8de3a8df6e2..a237635b4541a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3405,10 +3405,8 @@ bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
bool) const {
- // TODO: Allow vectors?
- if (VT.isVector())
- return false;
- return VT.isSimple() || !isOperationExpand(Opcode, VT);
+
+ return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
}
bool X86TargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
>From 2cde048101b0b96613c27e94c0369a0db0982bc0 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Tue, 23 Sep 2025 18:11:11 -0400
Subject: [PATCH 03/12] r
---
llvm/include/llvm/CodeGen/TargetLowering.h | 2 +-
llvm/test/CodeGen/AArch64/abdu-neg.ll | 2 +-
llvm/test/CodeGen/AArch64/arm64-srl-and.ll | 9 +-
llvm/test/CodeGen/AArch64/cgp-usubo.ll | 8 +-
llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll | 12 +-
.../AArch64/local-bounds-single-trap.ll | 16 +-
llvm/test/CodeGen/AArch64/sat-add.ll | 44 +-
.../AArch64/signed-truncation-check.ll | 6 +-
.../CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll | 8 +-
llvm/test/CodeGen/ARM/select_const.ll | 47 +-
llvm/test/CodeGen/Hexagon/loop-balign.ll | 104 +++-
llvm/test/CodeGen/NVPTX/i128.ll | 72 +--
.../PowerPC/atomicrmw-uinc-udec-wrap.ll | 46 +-
llvm/test/CodeGen/PowerPC/sat-add.ll | 38 +-
llvm/test/CodeGen/RISCV/branch-on-zero.ll | 28 +-
.../test/CodeGen/RISCV/overflow-intrinsics.ll | 84 +--
.../SPIRV/optimizations/add-check-overflow.ll | 4 +
.../CodeGen/Thumb/scheduler-clone-cpsr-def.ll | 33 +-
llvm/test/CodeGen/X86/abdu-neg.ll | 93 ++--
.../X86/div-rem-pair-recomposition-signed.ll | 520 +++++++++---------
.../div-rem-pair-recomposition-unsigned.ll | 432 ++++++++-------
llvm/test/CodeGen/X86/select.ll | 12 +-
.../AArch64/overflow-intrinsics.ll | 126 ++++-
.../SPARC/overflow-intrinsics.ll | 95 +++-
24 files changed, 1058 insertions(+), 783 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 590f61f08a9f5..c874fc9a36e1c 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3460,7 +3460,7 @@ class LLVM_ABI TargetLoweringBase {
// besides the overflow check. On some targets (e.g. SPARC), it is
// not profitable to form on overflow op if the math result has no
// concrete users.
- return MathUsed && isOperationLegalOrCustomOrPromote(Opcode, VT);
+ return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT));
}
// Return true if it is profitable to use a scalar input to a BUILD_VECTOR
diff --git a/llvm/test/CodeGen/AArch64/abdu-neg.ll b/llvm/test/CodeGen/AArch64/abdu-neg.ll
index 269cbf03f32a0..606162ade272b 100644
--- a/llvm/test/CodeGen/AArch64/abdu-neg.ll
+++ b/llvm/test/CodeGen/AArch64/abdu-neg.ll
@@ -355,7 +355,7 @@ define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: abd_cmp_i64:
; CHECK: // %bb.0:
; CHECK-NEXT: subs x8, x0, x1
-; CHECK-NEXT: cneg x0, x8, hs
+; CHECK-NEXT: cneg x0, x8, hi
; CHECK-NEXT: ret
%cmp = icmp ult i64 %a, %b
%ab = sub i64 %a, %b
diff --git a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll
index b58f6ba96a5b8..3f4d6f722fdb6 100644
--- a/llvm/test/CodeGen/AArch64/arm64-srl-and.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-srl-and.ll
@@ -9,13 +9,12 @@ define i32 @srl_and() {
; CHECK-LABEL: srl_and:
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: adrp x8, :got:g
-; CHECK-NEXT: mov w9, #50
; CHECK-NEXT: ldr x8, [x8, :got_lo12:g]
; CHECK-NEXT: ldrh w8, [x8]
-; CHECK-NEXT: eor w8, w8, w9
-; CHECK-NEXT: mov w9, #65535
-; CHECK-NEXT: add w8, w8, w9
-; CHECK-NEXT: and w0, w8, w8, lsr #16
+; CHECK-NEXT: cmp w8, #50
+; CHECK-NEXT: sub w8, w8, #1
+; CHECK-NEXT: cset w9, ne
+; CHECK-NEXT: and w0, w8, w9
; CHECK-NEXT: ret
entry:
%0 = load i16, ptr @g, align 4
diff --git a/llvm/test/CodeGen/AArch64/cgp-usubo.ll b/llvm/test/CodeGen/AArch64/cgp-usubo.ll
index d307107fc07ee..e49e8e86561c7 100644
--- a/llvm/test/CodeGen/AArch64/cgp-usubo.ll
+++ b/llvm/test/CodeGen/AArch64/cgp-usubo.ll
@@ -108,11 +108,9 @@ define i1 @usubo_ugt_constant_op1_i8(i8 %x, ptr %p) nounwind {
define i1 @usubo_eq_constant1_op1_i32(i32 %x, ptr %p) nounwind {
; CHECK-LABEL: usubo_eq_constant1_op1_i32:
; CHECK: // %bb.0:
-; CHECK-NEXT: cmp w0, #0
-; CHECK-NEXT: sub w9, w0, #1
-; CHECK-NEXT: cset w8, eq
-; CHECK-NEXT: str w9, [x1]
-; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: subs w8, w0, #1
+; CHECK-NEXT: cset w0, lo
+; CHECK-NEXT: str w8, [x1]
; CHECK-NEXT: ret
%s = add i32 %x, -1
%ov = icmp eq i32 %x, 0
diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
index 3f4dd116d91f8..7917be5728591 100644
--- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
+++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll
@@ -192,12 +192,12 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) {
; CHECK-NEXT: mov w22, #2 ; =0x2
; CHECK-NEXT: LBB3_5: ; %for.cond
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: cbz w22, LBB3_8
+; CHECK-NEXT: subs w22, w22, #1
+; CHECK-NEXT: b.lo LBB3_8
; CHECK-NEXT: ; %bb.6: ; %for.body
; CHECK-NEXT: ; in Loop: Header=BB3_5 Depth=1
-; CHECK-NEXT: sub w22, w22, #1
-; CHECK-NEXT: orr w9, w21, w20
; CHECK-NEXT: ldr w10, [x19, w22, sxtw #2]
+; CHECK-NEXT: orr w9, w21, w20
; CHECK-NEXT: cmp w9, w10
; CHECK-NEXT: b.eq LBB3_5
; CHECK-NEXT: ; %bb.7: ; %if.then
@@ -238,12 +238,12 @@ define i1 @test_conditional2(i32 %a, i32 %b, ptr %c) {
; OUTLINE-ATOMICS-NEXT: cset w8, eq
; OUTLINE-ATOMICS-NEXT: LBB3_1: ; %for.cond
; OUTLINE-ATOMICS-NEXT: ; =>This Inner Loop Header: Depth=1
-; OUTLINE-ATOMICS-NEXT: cbz w22, LBB3_4
+; OUTLINE-ATOMICS-NEXT: subs w22, w22, #1
+; OUTLINE-ATOMICS-NEXT: b.lo LBB3_4
; OUTLINE-ATOMICS-NEXT: ; %bb.2: ; %for.body
; OUTLINE-ATOMICS-NEXT: ; in Loop: Header=BB3_1 Depth=1
-; OUTLINE-ATOMICS-NEXT: sub w22, w22, #1
-; OUTLINE-ATOMICS-NEXT: orr w9, w21, w20
; OUTLINE-ATOMICS-NEXT: ldr w10, [x19, w22, sxtw #2]
+; OUTLINE-ATOMICS-NEXT: orr w9, w21, w20
; OUTLINE-ATOMICS-NEXT: cmp w9, w10
; OUTLINE-ATOMICS-NEXT: b.eq LBB3_1
; OUTLINE-ATOMICS-NEXT: ; %bb.3: ; %if.then
diff --git a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
index 1207eaa2612a3..f2c84006910c5 100644
--- a/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
+++ b/llvm/test/CodeGen/AArch64/local-bounds-single-trap.ll
@@ -17,24 +17,22 @@ define dso_local void @f8(i32 noundef %i, i32 noundef %k) #0 {
; CHECK-ASM-NEXT: .cfi_remember_state
; CHECK-ASM-NEXT: // kill: def $w0 killed $w0 def $x0
; CHECK-ASM-NEXT: sxtw x8, w0
+; CHECK-ASM-NEXT: mov w9, #10 // =0xa
; CHECK-ASM-NEXT: stp w1, w0, [sp, #8]
-; CHECK-ASM-NEXT: cmp x8, #10
-; CHECK-ASM-NEXT: b.hi .LBB0_5
+; CHECK-ASM-NEXT: subs x9, x9, x8
+; CHECK-ASM-NEXT: b.lo .LBB0_5
; CHECK-ASM-NEXT: // %bb.1: // %entry
-; CHECK-ASM-NEXT: mov w9, #10 // =0xa
-; CHECK-ASM-NEXT: sub x9, x9, x8
; CHECK-ASM-NEXT: cbz x9, .LBB0_5
; CHECK-ASM-NEXT: // %bb.2:
; CHECK-ASM-NEXT: ldrsw x9, [sp, #8]
+; CHECK-ASM-NEXT: mov w10, #10 // =0xa
+; CHECK-ASM-NEXT: subs x11, x10, x9
; CHECK-ASM-NEXT: adrp x10, .L_MergedGlobals
; CHECK-ASM-NEXT: add x10, x10, :lo12:.L_MergedGlobals
; CHECK-ASM-NEXT: strb wzr, [x10, x8]
-; CHECK-ASM-NEXT: cmp x9, #10
-; CHECK-ASM-NEXT: b.hi .LBB0_6
+; CHECK-ASM-NEXT: b.lo .LBB0_6
; CHECK-ASM-NEXT: // %bb.3:
-; CHECK-ASM-NEXT: mov w8, #10 // =0xa
-; CHECK-ASM-NEXT: sub x8, x8, x9
-; CHECK-ASM-NEXT: cbz x8, .LBB0_6
+; CHECK-ASM-NEXT: cbz x11, .LBB0_6
; CHECK-ASM-NEXT: // %bb.4:
; CHECK-ASM-NEXT: add x8, x10, x9
; CHECK-ASM-NEXT: strb wzr, [x8, #10]
diff --git a/llvm/test/CodeGen/AArch64/sat-add.ll b/llvm/test/CodeGen/AArch64/sat-add.ll
index ecd48d6b7c65b..12044ebe20fa1 100644
--- a/llvm/test/CodeGen/AArch64/sat-add.ll
+++ b/llvm/test/CodeGen/AArch64/sat-add.ll
@@ -25,9 +25,9 @@ define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: add w8, w8, #42
-; CHECK-NEXT: tst w8, #0x100
-; CHECK-NEXT: csinv w0, w8, wzr, eq
+; CHECK-NEXT: add w9, w0, #42
+; CHECK-NEXT: cmp w8, w9, uxtb
+; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: ret
%a = add i8 %x, 42
%c = icmp ugt i8 %x, %a
@@ -68,9 +68,9 @@ define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: add w8, w8, #42
-; CHECK-NEXT: tst w8, #0x10000
-; CHECK-NEXT: csinv w0, w8, wzr, eq
+; CHECK-NEXT: add w9, w0, #42
+; CHECK-NEXT: cmp w8, w9, uxth
+; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: ret
%a = add i16 %x, 42
%c = icmp ugt i16 %x, %a
@@ -188,9 +188,9 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xff
-; CHECK-NEXT: add w8, w8, w1, uxtb
-; CHECK-NEXT: tst w8, #0x100
-; CHECK-NEXT: csinv w0, w8, wzr, eq
+; CHECK-NEXT: add w9, w0, w1
+; CHECK-NEXT: cmp w8, w9, uxtb
+; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: ret
%a = add i8 %x, %y
%c = icmp ugt i8 %x, %a
@@ -201,11 +201,11 @@ define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
define i8 @unsigned_sat_variable_i8_using_cmp_notval(i8 %x, i8 %y) {
; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_notval:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0xff
-; CHECK-NEXT: add w9, w0, w1
-; CHECK-NEXT: add w8, w8, w0, uxtb
-; CHECK-NEXT: tst w8, #0x100
-; CHECK-NEXT: csinv w0, w9, wzr, eq
+; CHECK-NEXT: and w8, w0, #0xff
+; CHECK-NEXT: mvn w9, w1
+; CHECK-NEXT: add w10, w0, w1
+; CHECK-NEXT: cmp w8, w9, uxtb
+; CHECK-NEXT: csinv w0, w10, wzr, ls
; CHECK-NEXT: ret
%noty = xor i8 %y, -1
%a = add i8 %x, %y
@@ -234,9 +234,9 @@ define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
; CHECK: // %bb.0:
; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: add w8, w8, w1, uxth
-; CHECK-NEXT: tst w8, #0x10000
-; CHECK-NEXT: csinv w0, w8, wzr, eq
+; CHECK-NEXT: add w9, w0, w1
+; CHECK-NEXT: cmp w8, w9, uxth
+; CHECK-NEXT: csinv w0, w9, wzr, ls
; CHECK-NEXT: ret
%a = add i16 %x, %y
%c = icmp ugt i16 %x, %a
@@ -247,11 +247,11 @@ define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
define i16 @unsigned_sat_variable_i16_using_cmp_notval(i16 %x, i16 %y) {
; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_notval:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w1, #0xffff
-; CHECK-NEXT: add w9, w0, w1
-; CHECK-NEXT: add w8, w8, w0, uxth
-; CHECK-NEXT: tst w8, #0x10000
-; CHECK-NEXT: csinv w0, w9, wzr, eq
+; CHECK-NEXT: and w8, w0, #0xffff
+; CHECK-NEXT: mvn w9, w1
+; CHECK-NEXT: add w10, w0, w1
+; CHECK-NEXT: cmp w8, w9, uxth
+; CHECK-NEXT: csinv w0, w10, wzr, ls
; CHECK-NEXT: ret
%noty = xor i16 %y, -1
%a = add i16 %x, %y
diff --git a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
index 7c80f9320faec..0720a7f72bd8c 100644
--- a/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
+++ b/llvm/test/CodeGen/AArch64/signed-truncation-check.ll
@@ -313,9 +313,9 @@ define i1 @add_ultcmp_bad_i16_i8_cmp(i16 %x, i16 %y) nounwind {
define i1 @add_ultcmp_bad_i8_i16(i16 %x) nounwind {
; CHECK-LABEL: add_ultcmp_bad_i8_i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: and w8, w0, #0xffff
-; CHECK-NEXT: add w8, w8, #128
-; CHECK-NEXT: lsr w0, w8, #16
+; CHECK-NEXT: add w8, w0, #128
+; CHECK-NEXT: tst w8, #0xff80
+; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%tmp0 = add i16 %x, 128 ; 1U << (8-1)
%tmp1 = icmp ult i16 %tmp0, 128 ; 1U << (8-1)
diff --git a/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll
index 433fb325a7349..c37afeeea375d 100644
--- a/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/ARM/atomicrmw-uinc-udec-wrap.ll
@@ -147,11 +147,11 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: .LBB6_1: @ %atomicrmw.start
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
; CHECK-NEXT: ldrex r12, [r0]
-; CHECK-NEXT: mov r3, r1
; CHECK-NEXT: cmp r12, r1
-; CHECK-NEXT: subls r3, r12, #1
-; CHECK-NEXT: cmp r12, #0
-; CHECK-NEXT: moveq r3, r1
+; CHECK-NEXT: sub r3, r12, #1
+; CHECK-NEXT: movhi r3, r1
+; CHECK-NEXT: cmp r12, #1
+; CHECK-NEXT: movlo r3, r1
; CHECK-NEXT: strex r2, r3, [r0]
; CHECK-NEXT: cmp r2, #0
; CHECK-NEXT: bne .LBB6_1
diff --git a/llvm/test/CodeGen/ARM/select_const.ll b/llvm/test/CodeGen/ARM/select_const.ll
index 180daa12e7c52..6d8a7f70754d3 100644
--- a/llvm/test/CodeGen/ARM/select_const.ll
+++ b/llvm/test/CodeGen/ARM/select_const.ll
@@ -763,46 +763,35 @@ define i64 @opaque_constant2(i1 %cond, i64 %x) {
define i64 @func(i64 %arg) {
; ARM-LABEL: func:
; ARM: @ %bb.0: @ %entry
-; ARM-NEXT: adds r0, r0, #1
-; ARM-NEXT: mov r2, #0
-; ARM-NEXT: adcs r0, r1, #0
+; ARM-NEXT: and r0, r0, r1
; ARM-NEXT: mov r1, #0
-; ARM-NEXT: adcs r0, r2, #0
-; ARM-NEXT: movne r0, #8
+; ARM-NEXT: cmn r0, #1
+; ARM-NEXT: mov r0, #0
+; ARM-NEXT: moveq r0, #8
; ARM-NEXT: mov pc, lr
;
; THUMB2-LABEL: func:
; THUMB2: @ %bb.0: @ %entry
+; THUMB2-NEXT: ands r0, r1
+; THUMB2-NEXT: movs r1, #0
; THUMB2-NEXT: adds r0, #1
-; THUMB2-NEXT: mov.w r2, #0
-; THUMB2-NEXT: adcs r0, r1, #0
-; THUMB2-NEXT: mov.w r1, #0
-; THUMB2-NEXT: adcs r0, r2, #0
-; THUMB2-NEXT: it ne
-; THUMB2-NEXT: movne r0, #8
+; THUMB2-NEXT: mov.w r0, #0
+; THUMB2-NEXT: it eq
+; THUMB2-NEXT: moveq r0, #8
; THUMB2-NEXT: bx lr
;
; THUMB-LABEL: func:
; THUMB: @ %bb.0: @ %entry
-; THUMB-NEXT: .save {r4, lr}
-; THUMB-NEXT: push {r4, lr}
-; THUMB-NEXT: movs r2, #0
-; THUMB-NEXT: adds r3, r0, #1
-; THUMB-NEXT: mov r12, r1
-; THUMB-NEXT: mov r3, r12
-; THUMB-NEXT: adcs r3, r2
-; THUMB-NEXT: mov r12, r2
-; THUMB-NEXT: mov r3, r12
-; THUMB-NEXT: adcs r3, r2
-; THUMB-NEXT: subs r4, r3, #1
+; THUMB-NEXT: ands r0, r1
+; THUMB-NEXT: movs r1, #0
; THUMB-NEXT: adds r0, r0, #1
-; THUMB-NEXT: adcs r1, r2
-; THUMB-NEXT: sbcs r3, r4
-; THUMB-NEXT: lsls r0, r3, #3
-; THUMB-NEXT: movs r1, r2
-; THUMB-NEXT: pop {r4}
-; THUMB-NEXT: pop {r2}
-; THUMB-NEXT: bx r2
+; THUMB-NEXT: beq .LBB26_2
+; THUMB-NEXT: @ %bb.1: @ %entry
+; THUMB-NEXT: movs r0, r1
+; THUMB-NEXT: bx lr
+; THUMB-NEXT: .LBB26_2:
+; THUMB-NEXT: movs r0, #8
+; THUMB-NEXT: bx lr
entry:
%0 = add i64 %arg, 1
%1 = icmp ult i64 %0, 1
diff --git a/llvm/test/CodeGen/Hexagon/loop-balign.ll b/llvm/test/CodeGen/Hexagon/loop-balign.ll
index 78285f6d1ae64..c3b27a84ac3f1 100644
--- a/llvm/test/CodeGen/Hexagon/loop-balign.ll
+++ b/llvm/test/CodeGen/Hexagon/loop-balign.ll
@@ -1,9 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc -mtriple=hexagon -O3 < %s | FileCheck %s -check-prefix=BALIGN
; BALIGN: .p2align{{.*}}5
; The test for checking the alignment of 'for.body4.for.body4_crit_edge' basic block
define dso_local void @foo(i32 %nCol, i32 %nRow, ptr nocapture %resMat) local_unnamed_addr {
+; BALIGN-LABEL: foo:
+; BALIGN: .cfi_startproc
+; BALIGN-NEXT: // %bb.0: // %entry
+; BALIGN-NEXT: {
+; BALIGN-NEXT: r5 = asl(r1,#2)
+; BALIGN-NEXT: r3 = add(r0,#-1)
+; BALIGN-NEXT: r4 = #-2
+; BALIGN-NEXT: }
+; BALIGN-NEXT: // implicit-def: $d3
+; BALIGN-NEXT: {
+; BALIGN-NEXT: p0 = cmp.gt(r3,#0)
+; BALIGN-NEXT: r3 = #0
+; BALIGN-NEXT: r8 = r5
+; BALIGN-NEXT: if (!p0.new) r0 = #1
+; BALIGN-NEXT: }
+; BALIGN-NEXT: {
+; BALIGN-NEXT: p0 = cmp.gt(r1,#0)
+; BALIGN-NEXT: jump .LBB0_1
+; BALIGN-NEXT: }
+; BALIGN-NEXT: .p2align 4
+; BALIGN-NEXT: .LBB0_8: // %for.end7
+; BALIGN-NEXT: // in Loop: Header=BB0_1 Depth=1
+; BALIGN-NEXT: {
+; BALIGN-NEXT: r3 = add(r3,#1)
+; BALIGN-NEXT: r4 = add(r4,#1)
+; BALIGN-NEXT: }
+; BALIGN-NEXT: {
+; BALIGN-NEXT: p1 = cmp.eq(r3,r0)
+; BALIGN-NEXT: if (p1.new) jumpr:nt r31
+; BALIGN-NEXT: }
+; BALIGN-NEXT: .LBB0_1: // %Outerloop
+; BALIGN-NEXT: // =>This Loop Header: Depth=1
+; BALIGN-NEXT: // Child Loop BB0_3 Depth 2
+; BALIGN-NEXT: // Child Loop BB0_6 Depth 3
+; BALIGN-NEXT: {
+; BALIGN-NEXT: if (!p0) jump:nt .LBB0_8
+; BALIGN-NEXT: }
+; BALIGN-NEXT: // %bb.2: // %for.body.lr.ph
+; BALIGN-NEXT: // in Loop: Header=BB0_1 Depth=1
+; BALIGN-NEXT: {
+; BALIGN-NEXT: loop1(.LBB0_3,r1)
+; BALIGN-NEXT: p1 = cmp.eq(r3,#0)
+; BALIGN-NEXT: p2 = cmp.eq(r3,#1)
+; BALIGN-NEXT: jump .LBB0_3
+; BALIGN-NEXT: }
+; BALIGN-NEXT: .p2align 4
+; BALIGN-NEXT: .LBB0_7: // %for.end
+; BALIGN-NEXT: // in Loop: Header=BB0_3 Depth=2
+; BALIGN-NEXT: {
+; BALIGN-NEXT: r9 = clb(r7:6)
+; BALIGN-NEXT: memw(r2+#0) = r9.new
+; BALIGN-NEXT: }
+; BALIGN-NEXT: {
+; BALIGN-NEXT: nop
+; BALIGN-NEXT: nop
+; BALIGN-NEXT: nop
+; BALIGN-NEXT: } :endloop1
+; BALIGN-NEXT: {
+; BALIGN-NEXT: jump .LBB0_8
+; BALIGN-NEXT: }
+; BALIGN-NEXT: .LBB0_3: // Block address taken
+; BALIGN-NEXT: // %for.body
+; BALIGN-NEXT: // Parent Loop BB0_1 Depth=1
+; BALIGN-NEXT: // => This Loop Header: Depth=2
+; BALIGN-NEXT: // Child Loop BB0_6 Depth 3
+; BALIGN-NEXT: {
+; BALIGN-NEXT: r12 = r8
+; BALIGN-NEXT: r8 = add(r8,r5)
+; BALIGN-NEXT: if (p1) jump:nt .LBB0_7
+; BALIGN-NEXT: }
+; BALIGN-NEXT: // %bb.4: // %for.body4.peel
+; BALIGN-NEXT: // in Loop: Header=BB0_3 Depth=2
+; BALIGN-NEXT: {
+; BALIGN-NEXT: r12 = memw(r12+#0)
+; BALIGN-NEXT: }
+; BALIGN-NEXT: {
+; BALIGN-NEXT: r7:6 -= mpy(r12,r9)
+; BALIGN-NEXT: if (p2) jump:nt .LBB0_7
+; BALIGN-NEXT: }
+; BALIGN-NEXT: // %bb.5: // %for.body4.preheader.peel.newph
+; BALIGN-NEXT: // in Loop: Header=BB0_3 Depth=2
+; BALIGN-NEXT: {
+; BALIGN-NEXT: r13 = add(r4,#1)
+; BALIGN-NEXT: r12 = memw(r8+#0)
+; BALIGN-NEXT: }
+; BALIGN-NEXT: {
+; BALIGN-NEXT: loop0(.LBB0_6,r13)
+; BALIGN-NEXT: }
+; BALIGN-NEXT: .p2align 4
+; BALIGN-NEXT: .LBB0_6: // Block address taken
+; BALIGN-NEXT: // %for.body4
+; BALIGN-NEXT: // Parent Loop BB0_1 Depth=1
+; BALIGN-NEXT: // Parent Loop BB0_3 Depth=2
+; BALIGN-NEXT: // => This Inner Loop Header: Depth=3
+; BALIGN-NEXT: {
+; BALIGN-NEXT: r7:6 -= mpy(r12,r9)
+; BALIGN-NEXT: nop
+; BALIGN-NEXT: } :endloop0
+; BALIGN-NEXT: {
+; BALIGN-NEXT: jump .LBB0_7
+; BALIGN-NEXT: }
entry:
%shl = shl i32 %nRow, 2
%cmp36 = icmp sgt i32 %nRow, 0
@@ -85,7 +187,7 @@ if.end: ; preds = %for.end7
}
; Function Attrs: nounwind readnone
-declare i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64, i32, i32)
+declare i64 @llvm.hexagon.M2.dpmpyss.nac.s0(i64, i32, i32)
; Function Attrs: nounwind readnone
declare i32 @llvm.hexagon.S2.clbp(i64)
diff --git a/llvm/test/CodeGen/NVPTX/i128.ll b/llvm/test/CodeGen/NVPTX/i128.ll
index cdbbabe3e3b05..25aff73a38b82 100644
--- a/llvm/test/CodeGen/NVPTX/i128.ll
+++ b/llvm/test/CodeGen/NVPTX/i128.ll
@@ -61,21 +61,21 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: // %bb.3: // %udiv-bb1
; CHECK-NEXT: add.cc.s64 %rd71, %rd26, 1;
; CHECK-NEXT: addc.cc.s64 %rd72, %rd27, 0;
-; CHECK-NEXT: or.b64 %rd30, %rd71, %rd72;
-; CHECK-NEXT: setp.eq.b64 %p16, %rd30, 0;
; CHECK-NEXT: cvt.u32.u64 %r5, %rd26;
; CHECK-NEXT: sub.s32 %r6, 127, %r5;
-; CHECK-NEXT: shl.b64 %rd31, %rd3, %r6;
+; CHECK-NEXT: shl.b64 %rd30, %rd3, %r6;
; CHECK-NEXT: sub.s32 %r7, 64, %r6;
-; CHECK-NEXT: shr.u64 %rd32, %rd2, %r7;
-; CHECK-NEXT: or.b64 %rd33, %rd31, %rd32;
+; CHECK-NEXT: shr.u64 %rd31, %rd2, %r7;
+; CHECK-NEXT: or.b64 %rd32, %rd30, %rd31;
; CHECK-NEXT: sub.s32 %r8, 63, %r5;
-; CHECK-NEXT: shl.b64 %rd34, %rd2, %r8;
-; CHECK-NEXT: setp.gt.s32 %p17, %r6, 63;
-; CHECK-NEXT: selp.b64 %rd76, %rd34, %rd33, %p17;
+; CHECK-NEXT: shl.b64 %rd33, %rd2, %r8;
+; CHECK-NEXT: setp.gt.s32 %p16, %r6, 63;
+; CHECK-NEXT: selp.b64 %rd76, %rd33, %rd32, %p16;
; CHECK-NEXT: shl.b64 %rd75, %rd2, %r6;
+; CHECK-NEXT: or.b64 %rd34, %rd71, %rd72;
+; CHECK-NEXT: setp.eq.b64 %p17, %rd34, 0;
; CHECK-NEXT: mov.b64 %rd69, %rd70;
-; CHECK-NEXT: @%p16 bra $L__BB0_4;
+; CHECK-NEXT: @%p17 bra $L__BB0_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r9, %rd71;
; CHECK-NEXT: shr.u64 %rd35, %rd2, %r9;
@@ -191,21 +191,21 @@ define i128 @urem_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: // %bb.3: // %udiv-bb1
; CHECK-NEXT: add.cc.s64 %rd58, %rd17, 1;
; CHECK-NEXT: addc.cc.s64 %rd59, %rd18, 0;
-; CHECK-NEXT: or.b64 %rd21, %rd58, %rd59;
-; CHECK-NEXT: setp.eq.b64 %p14, %rd21, 0;
; CHECK-NEXT: cvt.u32.u64 %r5, %rd17;
; CHECK-NEXT: sub.s32 %r6, 127, %r5;
-; CHECK-NEXT: shl.b64 %rd22, %rd6, %r6;
+; CHECK-NEXT: shl.b64 %rd21, %rd6, %r6;
; CHECK-NEXT: sub.s32 %r7, 64, %r6;
-; CHECK-NEXT: shr.u64 %rd23, %rd5, %r7;
-; CHECK-NEXT: or.b64 %rd24, %rd22, %rd23;
+; CHECK-NEXT: shr.u64 %rd22, %rd5, %r7;
+; CHECK-NEXT: or.b64 %rd23, %rd21, %rd22;
; CHECK-NEXT: sub.s32 %r8, 63, %r5;
-; CHECK-NEXT: shl.b64 %rd25, %rd5, %r8;
-; CHECK-NEXT: setp.gt.s32 %p15, %r6, 63;
-; CHECK-NEXT: selp.b64 %rd63, %rd25, %rd24, %p15;
+; CHECK-NEXT: shl.b64 %rd24, %rd5, %r8;
+; CHECK-NEXT: setp.gt.s32 %p14, %r6, 63;
+; CHECK-NEXT: selp.b64 %rd63, %rd24, %rd23, %p14;
; CHECK-NEXT: shl.b64 %rd62, %rd5, %r6;
+; CHECK-NEXT: or.b64 %rd25, %rd58, %rd59;
+; CHECK-NEXT: setp.eq.b64 %p15, %rd25, 0;
; CHECK-NEXT: mov.b64 %rd56, %rd57;
-; CHECK-NEXT: @%p14 bra $L__BB1_4;
+; CHECK-NEXT: @%p15 bra $L__BB1_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r9, %rd58;
; CHECK-NEXT: shr.u64 %rd26, %rd5, %r9;
@@ -363,21 +363,21 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: // %bb.3: // %udiv-bb1
; CHECK-NEXT: add.cc.s64 %rd66, %rd27, 1;
; CHECK-NEXT: addc.cc.s64 %rd67, %rd28, 0;
-; CHECK-NEXT: or.b64 %rd31, %rd66, %rd67;
-; CHECK-NEXT: setp.eq.b64 %p16, %rd31, 0;
; CHECK-NEXT: cvt.u32.u64 %r5, %rd27;
; CHECK-NEXT: sub.s32 %r6, 127, %r5;
-; CHECK-NEXT: shl.b64 %rd32, %rd2, %r6;
+; CHECK-NEXT: shl.b64 %rd31, %rd2, %r6;
; CHECK-NEXT: sub.s32 %r7, 64, %r6;
-; CHECK-NEXT: shr.u64 %rd33, %rd1, %r7;
-; CHECK-NEXT: or.b64 %rd34, %rd32, %rd33;
+; CHECK-NEXT: shr.u64 %rd32, %rd1, %r7;
+; CHECK-NEXT: or.b64 %rd33, %rd31, %rd32;
; CHECK-NEXT: sub.s32 %r8, 63, %r5;
-; CHECK-NEXT: shl.b64 %rd35, %rd1, %r8;
-; CHECK-NEXT: setp.gt.s32 %p17, %r6, 63;
-; CHECK-NEXT: selp.b64 %rd71, %rd35, %rd34, %p17;
+; CHECK-NEXT: shl.b64 %rd34, %rd1, %r8;
+; CHECK-NEXT: setp.gt.s32 %p16, %r6, 63;
+; CHECK-NEXT: selp.b64 %rd71, %rd34, %rd33, %p16;
; CHECK-NEXT: shl.b64 %rd70, %rd1, %r6;
+; CHECK-NEXT: or.b64 %rd35, %rd66, %rd67;
+; CHECK-NEXT: setp.eq.b64 %p17, %rd35, 0;
; CHECK-NEXT: mov.b64 %rd64, %rd65;
-; CHECK-NEXT: @%p16 bra $L__BB4_4;
+; CHECK-NEXT: @%p17 bra $L__BB4_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r9, %rd66;
; CHECK-NEXT: shr.u64 %rd36, %rd1, %r9;
@@ -487,21 +487,21 @@ define i128 @udiv_i128(i128 %lhs, i128 %rhs) {
; CHECK-NEXT: // %bb.3: // %udiv-bb1
; CHECK-NEXT: add.cc.s64 %rd52, %rd17, 1;
; CHECK-NEXT: addc.cc.s64 %rd53, %rd18, 0;
-; CHECK-NEXT: or.b64 %rd21, %rd52, %rd53;
-; CHECK-NEXT: setp.eq.b64 %p14, %rd21, 0;
; CHECK-NEXT: cvt.u32.u64 %r5, %rd17;
; CHECK-NEXT: sub.s32 %r6, 127, %r5;
-; CHECK-NEXT: shl.b64 %rd22, %rd4, %r6;
+; CHECK-NEXT: shl.b64 %rd21, %rd4, %r6;
; CHECK-NEXT: sub.s32 %r7, 64, %r6;
-; CHECK-NEXT: shr.u64 %rd23, %rd3, %r7;
-; CHECK-NEXT: or.b64 %rd24, %rd22, %rd23;
+; CHECK-NEXT: shr.u64 %rd22, %rd3, %r7;
+; CHECK-NEXT: or.b64 %rd23, %rd21, %rd22;
; CHECK-NEXT: sub.s32 %r8, 63, %r5;
-; CHECK-NEXT: shl.b64 %rd25, %rd3, %r8;
-; CHECK-NEXT: setp.gt.s32 %p15, %r6, 63;
-; CHECK-NEXT: selp.b64 %rd57, %rd25, %rd24, %p15;
+; CHECK-NEXT: shl.b64 %rd24, %rd3, %r8;
+; CHECK-NEXT: setp.gt.s32 %p14, %r6, 63;
+; CHECK-NEXT: selp.b64 %rd57, %rd24, %rd23, %p14;
; CHECK-NEXT: shl.b64 %rd56, %rd3, %r6;
+; CHECK-NEXT: or.b64 %rd25, %rd52, %rd53;
+; CHECK-NEXT: setp.eq.b64 %p15, %rd25, 0;
; CHECK-NEXT: mov.b64 %rd50, %rd51;
-; CHECK-NEXT: @%p14 bra $L__BB5_4;
+; CHECK-NEXT: @%p15 bra $L__BB5_4;
; CHECK-NEXT: // %bb.1: // %udiv-preheader
; CHECK-NEXT: cvt.u32.u64 %r9, %rd52;
; CHECK-NEXT: shr.u64 %rd26, %rd3, %r9;
diff --git a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
index 4dc6d0ad3d5c7..05fe11026cc59 100644
--- a/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/PowerPC/atomicrmw-uinc-udec-wrap.ll
@@ -370,42 +370,42 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i64:
; CHECK: # %bb.0:
; CHECK-NEXT: sync
-; CHECK-NEXT: ld 6, 0(3)
+; CHECK-NEXT: ld 8, 0(3)
+; CHECK-NEXT: li 6, 1
+; CHECK-NEXT: li 7, 0
; CHECK-NEXT: .LBB7_1: # %atomicrmw.start
; CHECK-NEXT: # =>This Loop Header: Depth=1
-; CHECK-NEXT: # Child Loop BB7_4 Depth 2
-; CHECK-NEXT: cmpdi 6, 0
-; CHECK-NEXT: mr 7, 4
-; CHECK-NEXT: bc 12, 2, .LBB7_4
+; CHECK-NEXT: # Child Loop BB7_3 Depth 2
+; CHECK-NEXT: subc 5, 8, 6
+; CHECK-NEXT: addze. 9, 7
+; CHECK-NEXT: cmpld 1, 8, 4
+; CHECK-NEXT: cror 20, 2, 5
+; CHECK-NEXT: mr 9, 4
+; CHECK-NEXT: bc 12, 20, .LBB7_3
; CHECK-NEXT: # %bb.2: # %atomicrmw.start
; CHECK-NEXT: #
-; CHECK-NEXT: cmpld 6, 4
-; CHECK-NEXT: mr 7, 4
-; CHECK-NEXT: bc 12, 1, .LBB7_4
-; CHECK-NEXT: # %bb.3: # %atomicrmw.start
-; CHECK-NEXT: #
-; CHECK-NEXT: addi 7, 6, -1
-; CHECK-NEXT: .LBB7_4: # %cmpxchg.start
+; CHECK-NEXT: mr 9, 5
+; CHECK-NEXT: .LBB7_3: # %cmpxchg.start
; CHECK-NEXT: # Parent Loop BB7_1 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
; CHECK-NEXT: ldarx 5, 0, 3
-; CHECK-NEXT: cmpld 5, 6
-; CHECK-NEXT: bne- 0, .LBB7_7
-; CHECK-NEXT: # %bb.5: # %cmpxchg.fencedstore
+; CHECK-NEXT: cmpld 5, 8
+; CHECK-NEXT: bne- 0, .LBB7_6
+; CHECK-NEXT: # %bb.4: # %cmpxchg.fencedstore
; CHECK-NEXT: #
-; CHECK-NEXT: stdcx. 7, 0, 3
+; CHECK-NEXT: stdcx. 9, 0, 3
; CHECK-NEXT: creqv 20, 20, 20
-; CHECK-NEXT: bne- 0, .LBB7_4
-; CHECK-NEXT: # %bb.6: # %cmpxchg.end
+; CHECK-NEXT: bne- 0, .LBB7_3
+; CHECK-NEXT: # %bb.5: # %cmpxchg.end
; CHECK-NEXT: #
-; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: mr 8, 5
; CHECK-NEXT: bc 4, 20, .LBB7_1
-; CHECK-NEXT: b .LBB7_8
-; CHECK-NEXT: .LBB7_7: # %cmpxchg.nostore
+; CHECK-NEXT: b .LBB7_7
+; CHECK-NEXT: .LBB7_6: # %cmpxchg.nostore
; CHECK-NEXT: #
-; CHECK-NEXT: mr 6, 5
+; CHECK-NEXT: mr 8, 5
; CHECK-NEXT: b .LBB7_1
-; CHECK-NEXT: .LBB7_8: # %atomicrmw.end
+; CHECK-NEXT: .LBB7_7: # %atomicrmw.end
; CHECK-NEXT: mr 3, 5
; CHECK-NEXT: lwsync
; CHECK-NEXT: blr
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index 34b703a981105..fc608f9f6410b 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -24,11 +24,12 @@ define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 3, 3, 24
+; CHECK-NEXT: clrlwi 4, 3, 24
; CHECK-NEXT: addi 3, 3, 42
-; CHECK-NEXT: andi. 4, 3, 256
+; CHECK-NEXT: clrlwi 5, 3, 24
+; CHECK-NEXT: cmplw 4, 5
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iseleq 3, 3, 4
+; CHECK-NEXT: iselgt 3, 4, 3
; CHECK-NEXT: blr
%a = add i8 %x, 42
%c = icmp ugt i8 %x, %a
@@ -69,11 +70,12 @@ define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 3, 3, 16
+; CHECK-NEXT: clrlwi 4, 3, 16
; CHECK-NEXT: addi 3, 3, 42
-; CHECK-NEXT: andis. 4, 3, 1
+; CHECK-NEXT: clrlwi 5, 3, 16
+; CHECK-NEXT: cmplw 4, 5
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iseleq 3, 3, 4
+; CHECK-NEXT: iselgt 3, 4, 3
; CHECK-NEXT: blr
%a = add i16 %x, 42
%c = icmp ugt i16 %x, %a
@@ -114,9 +116,9 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
; CHECK-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
; CHECK: # %bb.0:
; CHECK-NEXT: addi 4, 3, 42
-; CHECK-NEXT: cmplw 4, 3
+; CHECK-NEXT: cmplw 3, 4
; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: isellt 3, 3, 4
+; CHECK-NEXT: iselgt 3, 3, 4
; CHECK-NEXT: blr
%a = add i32 %x, 42
%c = icmp ugt i32 %x, %a
@@ -203,12 +205,12 @@ define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 4, 4, 24
-; CHECK-NEXT: clrlwi 3, 3, 24
+; CHECK-NEXT: clrlwi 5, 3, 24
; CHECK-NEXT: add 3, 3, 4
-; CHECK-NEXT: andi. 4, 3, 256
+; CHECK-NEXT: clrlwi 4, 3, 24
+; CHECK-NEXT: cmplw 5, 4
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iseleq 3, 3, 4
+; CHECK-NEXT: iselgt 3, 4, 3
; CHECK-NEXT: blr
%a = add i8 %x, %y
%c = icmp ugt i8 %x, %a
@@ -254,12 +256,12 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 4, 4, 16
-; CHECK-NEXT: clrlwi 3, 3, 16
+; CHECK-NEXT: clrlwi 5, 3, 16
; CHECK-NEXT: add 3, 3, 4
-; CHECK-NEXT: andis. 4, 3, 1
+; CHECK-NEXT: clrlwi 4, 3, 16
+; CHECK-NEXT: cmplw 5, 4
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iseleq 3, 3, 4
+; CHECK-NEXT: iselgt 3, 4, 3
; CHECK-NEXT: blr
%a = add i16 %x, %y
%c = icmp ugt i16 %x, %a
@@ -304,9 +306,9 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
; CHECK-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
; CHECK: # %bb.0:
; CHECK-NEXT: add 4, 3, 4
-; CHECK-NEXT: cmplw 4, 3
+; CHECK-NEXT: cmplw 3, 4
; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: isellt 3, 3, 4
+; CHECK-NEXT: iselgt 3, 3, 4
; CHECK-NEXT: blr
%a = add i32 %x, %y
%c = icmp ugt i32 %x, %a
diff --git a/llvm/test/CodeGen/RISCV/branch-on-zero.ll b/llvm/test/CodeGen/RISCV/branch-on-zero.ll
index 02aeebdeb3775..b1d396d70ff5f 100644
--- a/llvm/test/CodeGen/RISCV/branch-on-zero.ll
+++ b/llvm/test/CodeGen/RISCV/branch-on-zero.ll
@@ -8,19 +8,21 @@ define i32 @optbranch_32(i32 %Arg) {
; RV32-LABEL: optbranch_32:
; RV32: # %bb.0: # %bb
; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: bnez a0, .LBB0_2
-; RV32-NEXT: # %bb.1: # %bb2
+; RV32-NEXT: beqz a0, .LBB0_2
+; RV32-NEXT: # %bb.1: # %bb3
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB0_2: # %bb2
; RV32-NEXT: li a0, -1
-; RV32-NEXT: .LBB0_2: # %bb3
; RV32-NEXT: ret
;
; RV64-LABEL: optbranch_32:
; RV64: # %bb.0: # %bb
; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: bnez a0, .LBB0_2
-; RV64-NEXT: # %bb.1: # %bb2
+; RV64-NEXT: beqz a0, .LBB0_2
+; RV64-NEXT: # %bb.1: # %bb3
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB0_2: # %bb2
; RV64-NEXT: li a0, -1
-; RV64-NEXT: .LBB0_2: # %bb3
; RV64-NEXT: ret
bb:
%i1 = icmp eq i32 %Arg, -1
@@ -41,20 +43,22 @@ define i64 @optbranch_64(i64 %Arg) {
; RV32-NEXT: seqz a2, a0
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: or a2, a0, a1
-; RV32-NEXT: bnez a2, .LBB1_2
-; RV32-NEXT: # %bb.1: # %bb2
+; RV32-NEXT: beqz a2, .LBB1_2
+; RV32-NEXT: # %bb.1: # %bb3
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB1_2: # %bb2
; RV32-NEXT: li a0, -1
; RV32-NEXT: li a1, -1
-; RV32-NEXT: .LBB1_2: # %bb3
; RV32-NEXT: ret
;
; RV64-LABEL: optbranch_64:
; RV64: # %bb.0: # %bb
; RV64-NEXT: addi a0, a0, 1
-; RV64-NEXT: bnez a0, .LBB1_2
-; RV64-NEXT: # %bb.1: # %bb2
+; RV64-NEXT: beqz a0, .LBB1_2
+; RV64-NEXT: # %bb.1: # %bb3
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB1_2: # %bb2
; RV64-NEXT: li a0, -1
-; RV64-NEXT: .LBB1_2: # %bb3
; RV64-NEXT: ret
bb:
%i1 = icmp eq i64 %Arg, -1
diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
index ba6769b2aa3e1..148886224454c 100644
--- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
+++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll
@@ -48,33 +48,36 @@ define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp {
define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; RV32-LABEL: uaddo1_math_overflow_used:
; RV32: # %bb.0:
-; RV32-NEXT: add a5, a3, a1
-; RV32-NEXT: add a0, a2, a0
-; RV32-NEXT: sltu a1, a0, a2
-; RV32-NEXT: add a5, a5, a1
-; RV32-NEXT: beq a5, a3, .LBB1_2
+; RV32-NEXT: add a6, a3, a1
+; RV32-NEXT: add a5, a2, a0
+; RV32-NEXT: sltu a7, a5, a2
+; RV32-NEXT: add a6, a6, a7
+; RV32-NEXT: beq a6, a1, .LBB1_2
; RV32-NEXT: # %bb.1:
-; RV32-NEXT: sltu a1, a5, a3
+; RV32-NEXT: sltu a0, a6, a1
+; RV32-NEXT: beqz a0, .LBB1_3
+; RV32-NEXT: j .LBB1_4
; RV32-NEXT: .LBB1_2:
-; RV32-NEXT: bnez a1, .LBB1_4
-; RV32-NEXT: # %bb.3:
+; RV32-NEXT: sltu a0, a5, a0
+; RV32-NEXT: bnez a0, .LBB1_4
+; RV32-NEXT: .LBB1_3:
; RV32-NEXT: li a2, 42
; RV32-NEXT: .LBB1_4:
-; RV32-NEXT: neg a1, a1
+; RV32-NEXT: neg a1, a0
; RV32-NEXT: and a1, a1, a3
-; RV32-NEXT: sw a0, 0(a4)
-; RV32-NEXT: sw a5, 4(a4)
+; RV32-NEXT: sw a5, 0(a4)
+; RV32-NEXT: sw a6, 4(a4)
; RV32-NEXT: mv a0, a2
; RV32-NEXT: ret
;
; RV64-LABEL: uaddo1_math_overflow_used:
; RV64: # %bb.0:
-; RV64-NEXT: add a0, a1, a0
-; RV64-NEXT: bltu a0, a1, .LBB1_2
+; RV64-NEXT: add a3, a1, a0
+; RV64-NEXT: bltu a3, a0, .LBB1_2
; RV64-NEXT: # %bb.1:
; RV64-NEXT: li a1, 42
; RV64-NEXT: .LBB1_2:
-; RV64-NEXT: sd a0, 0(a2)
+; RV64-NEXT: sd a3, 0(a2)
; RV64-NEXT: mv a0, a1
; RV64-NEXT: ret
%add = add i64 %b, %a
@@ -200,7 +203,7 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; RV32-NEXT: add a0, a2, a0
; RV32-NEXT: sltu a1, a0, a2
; RV32-NEXT: add a5, a5, a1
-; RV32-NEXT: beq a5, a3, .LBB5_2
+; RV32-NEXT: beq a3, a5, .LBB5_2
; RV32-NEXT: # %bb.1:
; RV32-NEXT: sltu a1, a5, a3
; RV32-NEXT: .LBB5_2:
@@ -617,9 +620,10 @@ define i1 @uaddo_i64_increment_alt(i64 %x, ptr %p) {
; RV32-LABEL: uaddo_i64_increment_alt:
; RV32: # %bb.0:
; RV32-NEXT: addi a3, a0, 1
-; RV32-NEXT: seqz a0, a3
-; RV32-NEXT: add a1, a1, a0
-; RV32-NEXT: or a0, a3, a1
+; RV32-NEXT: and a0, a0, a1
+; RV32-NEXT: seqz a4, a3
+; RV32-NEXT: addi a0, a0, 1
+; RV32-NEXT: add a1, a1, a4
; RV32-NEXT: seqz a0, a0
; RV32-NEXT: sw a3, 0(a2)
; RV32-NEXT: sw a1, 4(a2)
@@ -642,12 +646,13 @@ define i1 @uaddo_i64_increment_alt(i64 %x, ptr %p) {
define i1 @uaddo_i64_increment_alt_dom(i64 %x, ptr %p) {
; RV32-LABEL: uaddo_i64_increment_alt_dom:
; RV32: # %bb.0:
-; RV32-NEXT: addi a3, a0, 1
+; RV32-NEXT: and a3, a0, a1
+; RV32-NEXT: addi a4, a0, 1
+; RV32-NEXT: addi a3, a3, 1
+; RV32-NEXT: seqz a5, a4
; RV32-NEXT: seqz a0, a3
-; RV32-NEXT: add a1, a1, a0
-; RV32-NEXT: or a0, a3, a1
-; RV32-NEXT: seqz a0, a0
-; RV32-NEXT: sw a3, 0(a2)
+; RV32-NEXT: add a1, a1, a5
+; RV32-NEXT: sw a4, 0(a2)
; RV32-NEXT: sw a1, 4(a2)
; RV32-NEXT: ret
;
@@ -668,18 +673,16 @@ define i1 @uaddo_i64_increment_alt_dom(i64 %x, ptr %p) {
define i1 @uaddo_i32_decrement_alt(i32 signext %x, ptr %p) {
; RV32-LABEL: uaddo_i32_decrement_alt:
; RV32: # %bb.0:
-; RV32-NEXT: snez a2, a0
-; RV32-NEXT: addi a0, a0, -1
-; RV32-NEXT: sw a0, 0(a1)
-; RV32-NEXT: mv a0, a2
+; RV32-NEXT: addi a2, a0, -1
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: sw a2, 0(a1)
; RV32-NEXT: ret
;
; RV64-LABEL: uaddo_i32_decrement_alt:
; RV64: # %bb.0:
-; RV64-NEXT: snez a2, a0
-; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: sw a0, 0(a1)
-; RV64-NEXT: mv a0, a2
+; RV64-NEXT: addi a2, a0, -1
+; RV64-NEXT: snez a0, a0
+; RV64-NEXT: sw a2, 0(a1)
; RV64-NEXT: ret
%a = add i32 %x, -1
store i32 %a, ptr %p
@@ -690,21 +693,20 @@ define i1 @uaddo_i32_decrement_alt(i32 signext %x, ptr %p) {
define i1 @uaddo_i64_decrement_alt(i64 %x, ptr %p) {
; RV32-LABEL: uaddo_i64_decrement_alt:
; RV32: # %bb.0:
-; RV32-NEXT: or a3, a0, a1
-; RV32-NEXT: seqz a4, a0
-; RV32-NEXT: addi a5, a0, -1
-; RV32-NEXT: snez a0, a3
-; RV32-NEXT: sub a1, a1, a4
-; RV32-NEXT: sw a5, 0(a2)
+; RV32-NEXT: seqz a3, a0
+; RV32-NEXT: addi a4, a0, -1
+; RV32-NEXT: or a0, a0, a1
+; RV32-NEXT: sub a1, a1, a3
+; RV32-NEXT: snez a0, a0
+; RV32-NEXT: sw a4, 0(a2)
; RV32-NEXT: sw a1, 4(a2)
; RV32-NEXT: ret
;
; RV64-LABEL: uaddo_i64_decrement_alt:
; RV64: # %bb.0:
-; RV64-NEXT: snez a2, a0
-; RV64-NEXT: addi a0, a0, -1
-; RV64-NEXT: sd a0, 0(a1)
-; RV64-NEXT: mv a0, a2
+; RV64-NEXT: addi a2, a0, -1
+; RV64-NEXT: snez a0, a0
+; RV64-NEXT: sd a2, 0(a1)
; RV64-NEXT: ret
%a = add i64 %x, -1
store i64 %a, ptr %p
diff --git a/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll b/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll
index 2db620dab8801..0b389e3a26c78 100644
--- a/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll
+++ b/llvm/test/CodeGen/SPIRV/optimizations/add-check-overflow.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; This test aims to check ability to support "Arithmetic with Overflow" intrinsics
; in the special case when those intrinsics are being generated by the CodeGenPrepare;
; pass during translations with optimization (note -disable-lsr, to inhibit
@@ -89,3 +90,6 @@ l1:
exit:
ret i32 %i
}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
+; NOLSR: {{.*}}
diff --git a/llvm/test/CodeGen/Thumb/scheduler-clone-cpsr-def.ll b/llvm/test/CodeGen/Thumb/scheduler-clone-cpsr-def.ll
index 31e54c43c1e5f..4c92a00020475 100644
--- a/llvm/test/CodeGen/Thumb/scheduler-clone-cpsr-def.ll
+++ b/llvm/test/CodeGen/Thumb/scheduler-clone-cpsr-def.ll
@@ -11,27 +11,20 @@
define i64 @f(i64 %x2, i32 %z) {
; CHECK-LABEL: f:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, r5, r7, lr}
-; CHECK-NEXT: push {r4, r5, r7, lr}
-; CHECK-NEXT: movs r2, #0
-; CHECK-NEXT: subs r3, r0, #1
-; CHECK-NEXT: mov r3, r1
-; CHECK-NEXT: sbcs r3, r2
-; CHECK-NEXT: mov r3, r2
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: mov r2, r0
+; CHECK-NEXT: orrs r2, r1
+; CHECK-NEXT: rsbs r3, r2, #0
; CHECK-NEXT: adcs r3, r2
-; CHECK-NEXT: movs r4, #30
-; CHECK-NEXT: subs r5, r0, #1
-; CHECK-NEXT: mov r5, r1
-; CHECK-NEXT: sbcs r5, r2
-; CHECK-NEXT: adcs r4, r2
-; CHECK-NEXT: lsls r2, r1, #1
-; CHECK-NEXT: lsls r2, r4
-; CHECK-NEXT: movs r4, #1
-; CHECK-NEXT: eors r4, r3
-; CHECK-NEXT: lsrs r0, r4
-; CHECK-NEXT: orrs r0, r2
-; CHECK-NEXT: lsrs r1, r4
-; CHECK-NEXT: pop {r4, r5, r7, pc}
+; CHECK-NEXT: lsrs r0, r3
+; CHECK-NEXT: movs r2, #31
+; CHECK-NEXT: eors r2, r3
+; CHECK-NEXT: lsls r4, r1, #1
+; CHECK-NEXT: lsls r4, r2
+; CHECK-NEXT: orrs r0, r4
+; CHECK-NEXT: lsrs r1, r3
+; CHECK-NEXT: pop {r4, pc}
%x3 = add nsw i64 %x2, -1
%x8 = icmp ne i64 %x2, 0
%x9 = xor i1 %x8, true
diff --git a/llvm/test/CodeGen/X86/abdu-neg.ll b/llvm/test/CodeGen/X86/abdu-neg.ll
index b7c34070f1af6..7309f1902bff6 100644
--- a/llvm/test/CodeGen/X86/abdu-neg.ll
+++ b/llvm/test/CodeGen/X86/abdu-neg.ll
@@ -804,21 +804,24 @@ define i32 @abd_cmp_i32(i32 %a, i32 %b) nounwind {
define i64 @abd_cmp_i64(i64 %a, i64 %b) nounwind {
; X86-LABEL: abd_cmp_i64:
; X86: # %bb.0:
+; X86-NEXT: pushl %ebx
+; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: subl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl $0, %esi
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: xorl %esi, %ecx
-; X86-NEXT: xorl %esi, %eax
-; X86-NEXT: subl %esi, %eax
-; X86-NEXT: sbbl %esi, %ecx
-; X86-NEXT: negl %eax
-; X86-NEXT: sbbl %ecx, %edx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: subl %eax, %edi
+; X86-NEXT: movl %esi, %ebx
+; X86-NEXT: sbbl %edx, %ebx
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: cmovael %edi, %eax
+; X86-NEXT: cmovael %ebx, %edx
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: popl %ebx
; X86-NEXT: retl
;
; X64-LABEL: abd_cmp_i64:
@@ -845,36 +848,34 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $16, %esp
-; X86-NEXT: movl 32(%ebp), %esi
-; X86-NEXT: movl 36(%ebp), %eax
; X86-NEXT: movl 24(%ebp), %ecx
-; X86-NEXT: movl 28(%ebp), %edi
-; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl 40(%ebp), %eax
+; X86-NEXT: movl 44(%ebp), %esi
+; X86-NEXT: subl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: sbbl %edx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 32(%ebp), %esi
+; X86-NEXT: movl 48(%ebp), %edi
+; X86-NEXT: sbbl %esi, %edi
+; X86-NEXT: movl 36(%ebp), %ebx
+; X86-NEXT: movl 52(%ebp), %eax
+; X86-NEXT: sbbl %ebx, %eax
; X86-NEXT: subl 40(%ebp), %ecx
-; X86-NEXT: sbbl 44(%ebp), %edi
+; X86-NEXT: sbbl 44(%ebp), %edx
; X86-NEXT: sbbl 48(%ebp), %esi
-; X86-NEXT: sbbl 52(%ebp), %eax
-; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %ebx, %ebx
-; X86-NEXT: xorl %ebx, %eax
-; X86-NEXT: xorl %ebx, %esi
-; X86-NEXT: xorl %ebx, %edi
-; X86-NEXT: xorl %ebx, %ecx
-; X86-NEXT: subl %ebx, %ecx
-; X86-NEXT: sbbl %ebx, %edi
-; X86-NEXT: sbbl %ebx, %esi
-; X86-NEXT: sbbl %ebx, %eax
-; X86-NEXT: negl %ecx
-; X86-NEXT: movl $0, %ebx
-; X86-NEXT: sbbl %edi, %ebx
-; X86-NEXT: movl $0, %edi
-; X86-NEXT: sbbl %esi, %edi
-; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: sbbl 52(%ebp), %ebx
+; X86-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: cmovael {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: cmovael %edi, %esi
+; X86-NEXT: cmovael %eax, %ebx
; X86-NEXT: movl 8(%ebp), %eax
+; X86-NEXT: movl %ebx, 12(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 4(%eax)
; X86-NEXT: movl %ecx, (%eax)
-; X86-NEXT: movl %ebx, 4(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
-; X86-NEXT: movl %edx, 12(%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
@@ -884,19 +885,15 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind {
;
; X64-LABEL: abd_cmp_i128:
; X64: # %bb.0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: subq %rdx, %rax
-; X64-NEXT: sbbq %rcx, %rsi
-; X64-NEXT: movl $0, %ecx
-; X64-NEXT: sbbq %rcx, %rcx
-; X64-NEXT: xorq %rcx, %rsi
-; X64-NEXT: xorq %rcx, %rax
-; X64-NEXT: subq %rcx, %rax
+; X64-NEXT: movq %rdx, %rax
+; X64-NEXT: subq %rdi, %rax
+; X64-NEXT: movq %rcx, %r8
+; X64-NEXT: sbbq %rsi, %r8
+; X64-NEXT: subq %rdx, %rdi
; X64-NEXT: sbbq %rcx, %rsi
-; X64-NEXT: negq %rax
-; X64-NEXT: sbbq %rsi, %rdi
-; X64-NEXT: movq %rdi, %rdx
+; X64-NEXT: cmovbq %rdi, %rax
+; X64-NEXT: cmovbq %rsi, %r8
+; X64-NEXT: movq %r8, %rdx
; X64-NEXT: retq
%cmp = icmp ult i128 %a, %b
%ab = sub i128 %a, %b
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
index 455b72d16a075..7236b2c3eec5d 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-signed.ll
@@ -152,14 +152,14 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $176, %esp
-; X86-NEXT: movl 32(%ebp), %edx
-; X86-NEXT: movl 36(%ebp), %ecx
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: movl 36(%ebp), %edx
+; X86-NEXT: movl %edx, %eax
; X86-NEXT: sarl $31, %eax
-; X86-NEXT: xorl %eax, %ecx
-; X86-NEXT: movl %ecx, %edi
; X86-NEXT: xorl %eax, %edx
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: xorl %eax, %ecx
+; X86-NEXT: movl %ecx, %esi
; X86-NEXT: movl 28(%ebp), %edx
; X86-NEXT: xorl %eax, %edx
; X86-NEXT: movl 24(%ebp), %ecx
@@ -172,26 +172,27 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: sbbl %eax, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 52(%ebp), %esi
-; X86-NEXT: movl %esi, %edx
+; X86-NEXT: movl 52(%ebp), %ecx
+; X86-NEXT: movl %ecx, %edx
; X86-NEXT: sarl $31, %edx
-; X86-NEXT: xorl %edx, %esi
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: xorl %edx, %edi
; X86-NEXT: movl 48(%ebp), %ecx
; X86-NEXT: xorl %edx, %ecx
; X86-NEXT: movl 44(%ebp), %ebx
; X86-NEXT: xorl %edx, %ebx
-; X86-NEXT: movl 40(%ebp), %edi
-; X86-NEXT: xorl %edx, %edi
-; X86-NEXT: subl %edx, %edi
+; X86-NEXT: movl 40(%ebp), %esi
+; X86-NEXT: xorl %edx, %esi
+; X86-NEXT: subl %edx, %esi
; X86-NEXT: sbbl %edx, %ebx
; X86-NEXT: sbbl %edx, %ecx
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: sbbl %edx, %edi
; X86-NEXT: xorl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ebx, %eax
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: movl %edi, %ecx
+; X86-NEXT: orl %edi, %eax
+; X86-NEXT: movl %esi, %ecx
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: sete %cl
@@ -203,92 +204,99 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: sete %al
; X86-NEXT: orb %cl, %al
; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: bsrl %esi, %edx
-; X86-NEXT: xorl $31, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: bsrl %eax, %ecx
+; X86-NEXT: bsrl %edi, %ecx
; X86-NEXT: xorl $31, %ecx
-; X86-NEXT: orl $32, %ecx
-; X86-NEXT: testl %esi, %esi
-; X86-NEXT: cmovnel %edx, %ecx
-; X86-NEXT: bsrl %ebx, %edx
+; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: bsrl %edi, %edi
-; X86-NEXT: xorl $31, %edi
-; X86-NEXT: orl $32, %edi
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: cmovnel %edx, %edi
-; X86-NEXT: orl $64, %edi
-; X86-NEXT: movl %eax, %edx
+; X86-NEXT: orl $32, %edx
+; X86-NEXT: testl %edi, %edi
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: bsrl %ebx, %eax
+; X86-NEXT: xorl $31, %eax
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %esi, %edx
-; X86-NEXT: cmovnel %ecx, %edi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: bsrl %eax, %edx
-; X86-NEXT: xorl $31, %edx
-; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: bsrl %esi, %ecx
; X86-NEXT: xorl $31, %ecx
; X86-NEXT: orl $32, %ecx
-; X86-NEXT: testl %eax, %eax
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: cmovnel %eax, %ecx
+; X86-NEXT: orl $64, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %edi, %esi
; X86-NEXT: cmovnel %edx, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: bsrl %ebx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: bsrl %eax, %esi
; X86-NEXT: xorl $31, %esi
-; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: bsrl %ebx, %edx
; X86-NEXT: xorl $31, %edx
; X86-NEXT: orl $32, %edx
-; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: testl %eax, %eax
; X86-NEXT: cmovnel %esi, %edx
-; X86-NEXT: orl $64, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: orl %eax, %esi
-; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: bsrl %eax, %edi
+; X86-NEXT: xorl $31, %edi
+; X86-NEXT: bsrl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: xorl $31, %esi
+; X86-NEXT: orl $32, %esi
+; X86-NEXT: testl %eax, %eax
+; X86-NEXT: cmovnel %edi, %esi
+; X86-NEXT: orl $64, %esi
+; X86-NEXT: movl %ebx, %edi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: cmovnel %edx, %esi
; X86-NEXT: xorl %ebx, %ebx
-; X86-NEXT: subl %edx, %edi
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: sbbl %edx, %edx
-; X86-NEXT: movl $0, %esi
-; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: subl %esi, %ecx
; X86-NEXT: movl $0, %eax
; X86-NEXT: sbbl %eax, %eax
-; X86-NEXT: movl $127, %ecx
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmpl %edi, %ecx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: sbbl %edx, %ecx
-; X86-NEXT: movl $0, %ecx
+; X86-NEXT: movl $0, %esi
+; X86-NEXT: sbbl %esi, %esi
+; X86-NEXT: movl $0, %edi
+; X86-NEXT: sbbl %edi, %edi
+; X86-NEXT: movl $127, %edx
+; X86-NEXT: cmpl %ecx, %edx
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %eax, %edx
+; X86-NEXT: movl $0, %edx
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %ecx
-; X86-NEXT: movl $0, %ecx
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %eax, %ecx
-; X86-NEXT: setb %cl
-; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %cl # 1-byte Folded Reload
+; X86-NEXT: sbbl %esi, %edx
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %edi, %edx
+; X86-NEXT: setb %dl
+; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: cmovnel %ebx, %edx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: cmovnel %ebx, %esi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: cmovnel %ebx, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: cmovnel %ebx, %eax
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: cmovnel %ebx, %edi
; X86-NEXT: cmovel {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
; X86-NEXT: jne .LBB4_1
; X86-NEXT: # %bb.8: # %_udiv-special-cases
-; X86-NEXT: movl %edx, %edi
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: xorl $127, %eax
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: movl %edi, %ecx
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, %edx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl %edi, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: je .LBB4_9
; X86-NEXT: # %bb.5: # %udiv-bb1
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: addl $1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: xorps %xmm0, %xmm0
@@ -299,8 +307,6 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %ebx, %ecx
; X86-NEXT: xorb $127, %cl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
@@ -310,251 +316,245 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl 152(%esp,%eax), %esi
; X86-NEXT: movl 156(%esp,%eax), %edx
; X86-NEXT: shldl %cl, %esi, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 144(%esp,%eax), %edx
-; X86-NEXT: movl 148(%esp,%eax), %eax
-; X86-NEXT: shldl %cl, %eax, %esi
-; X86-NEXT: shldl %cl, %edx, %eax
-; X86-NEXT: shll %cl, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addl $1, %ebx
+; X86-NEXT: movl 144(%esp,%eax), %ebx
+; X86-NEXT: movl 148(%esp,%eax), %edi
+; X86-NEXT: shldl %cl, %edi, %esi
+; X86-NEXT: shldl %cl, %ebx, %edi
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: shll %cl, %ebx
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl $0, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: adcl $0, %edx
-; X86-NEXT: jae .LBB4_2
-; X86-NEXT: # %bb.6:
-; X86-NEXT: xorl %ecx, %ecx
-; X86-NEXT: xorl %edx, %edx
-; X86-NEXT: movl %esi, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: jmp .LBB4_7
-; X86-NEXT: .LBB4_1:
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: jmp .LBB4_9
-; X86-NEXT: .LBB4_2: # %udiv-preheader
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: je .LBB4_6
+; X86-NEXT: # %bb.2: # %udiv-preheader
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: movl 108(%esp,%eax), %edx
+; X86-NEXT: movl 108(%esp,%eax), %ebx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 104(%esp,%eax), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shrdl %cl, %ebx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 96(%esp,%eax), %edi
+; X86-NEXT: movl 100(%esp,%eax), %eax
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 104(%esp,%eax), %ebx
-; X86-NEXT: movl %ebx, %esi
+; X86-NEXT: movl %eax, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: shrdl %cl, %edx, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 96(%esp,%eax), %esi
-; X86-NEXT: movl 100(%esp,%eax), %eax
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %eax, %edi
-; X86-NEXT: shrdl %cl, %ebx, %edi
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: shrl %cl, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shrl %cl, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: # kill: def $cl killed $cl killed $ecx
-; X86-NEXT: shrdl %cl, %eax, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shrdl %cl, %eax, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: addl $-1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl $-1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: adcl $-1, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: adcl $-1, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: xorl %edx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: adcl $-1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: adcl $-1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: .p2align 4
; X86-NEXT: .LBB4_3: # %udiv-do-while
; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl $1, %edi, %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $1, %esi, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl $1, %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $1, %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shldl $1, %eax, %edx
+; X86-NEXT: orl %ecx, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $1, %edx, %eax
+; X86-NEXT: orl %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: shldl $1, %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl %ebx, %edx
-; X86-NEXT: shldl $1, %ebx, %eax
+; X86-NEXT: shldl $1, %eax, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl %eax, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: shldl $1, %ebx, %edx
-; X86-NEXT: shldl $1, %ecx, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: shldl $1, %edi, %ecx
-; X86-NEXT: orl %esi, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: cmpl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shldl $1, %ecx, %edi
-; X86-NEXT: orl %esi, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: shldl $1, %edi, %ecx
-; X86-NEXT: orl %esi, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addl %edi, %edi
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmpl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: sbbl %edx, %ecx
+; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: sbbl %edi, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: sbbl %ebx, %ecx
; X86-NEXT: sarl $31, %ecx
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: andl $1, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, %edi
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; X86-NEXT: movl %ecx, %esi
-; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: andl $1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %ebx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: andl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: subl %ecx, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %eax, %edx
-; X86-NEXT: movl %edx, %ebx
-; X86-NEXT: sbbl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: sbbl %edi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: subl %ecx, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: sbbl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: addl $-1, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: adcl $-1, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
; X86-NEXT: adcl $-1, %edi
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: adcl $-1, %esi
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %esi, %eax
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: adcl $-1, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: adcl $-1, %ebx
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %ebx, %eax
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %edi, %ecx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %edx, %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: jne .LBB4_3
; X86-NEXT: # %bb.4:
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %ecx, %esi
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: jmp .LBB4_7
+; X86-NEXT: .LBB4_1:
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: jmp .LBB4_9
+; X86-NEXT: .LBB4_6:
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: .LBB4_7: # %udiv-loop-exit
-; X86-NEXT: shldl $1, %ebx, %esi
-; X86-NEXT: orl %edx, %esi
-; X86-NEXT: shldl $1, %eax, %ebx
-; X86-NEXT: orl %edx, %ebx
-; X86-NEXT: shldl $1, %edi, %eax
-; X86-NEXT: orl %edx, %eax
-; X86-NEXT: movl %edi, %edx
-; X86-NEXT: addl %edi, %edx
+; X86-NEXT: shldl $1, %esi, %edx
; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: shldl $1, %edi, %esi
+; X86-NEXT: orl %ecx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl $1, %ebx, %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: addl %ebx, %ebx
+; X86-NEXT: orl %eax, %ebx
; X86-NEXT: .LBB4_9: # %udiv-end
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: xorl %ecx, %esi
-; X86-NEXT: xorl %ecx, %ebx
-; X86-NEXT: xorl %ecx, %eax
-; X86-NEXT: xorl %ecx, %edx
-; X86-NEXT: subl %ecx, %edx
-; X86-NEXT: sbbl %ecx, %eax
-; X86-NEXT: sbbl %ecx, %ebx
-; X86-NEXT: sbbl %ecx, %esi
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 56(%ebp), %ecx
-; X86-NEXT: movl %edx, (%ecx)
-; X86-NEXT: movl %eax, 4(%ecx)
-; X86-NEXT: movl %ebx, 8(%ecx)
-; X86-NEXT: movl %esi, 12(%ecx)
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 40(%ebp), %ecx
-; X86-NEXT: movl %ebx, %edi
-; X86-NEXT: movl %edx, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: xorl %eax, %edx
+; X86-NEXT: xorl %eax, %esi
+; X86-NEXT: xorl %eax, %edi
+; X86-NEXT: xorl %eax, %ebx
+; X86-NEXT: subl %eax, %ebx
+; X86-NEXT: sbbl %eax, %edi
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %eax, %esi
+; X86-NEXT: sbbl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: mull %ecx
+; X86-NEXT: movl 56(%ebp), %eax
+; X86-NEXT: movl %ebx, (%eax)
+; X86-NEXT: movl %edi, 4(%eax)
+; X86-NEXT: movl %esi, 8(%eax)
+; X86-NEXT: movl %edx, 12(%eax)
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: movl 40(%ebp), %edi
+; X86-NEXT: mull %edi
; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: mull %ecx
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull 40(%ebp)
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %edx, %ecx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: movl 44(%ebp), %esi
-; X86-NEXT: mull %esi
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: movl 44(%ebp), %edi
+; X86-NEXT: mull %edi
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: adcl %ebx, %edx
; X86-NEXT: movl %edx, %ecx
-; X86-NEXT: setb %bl
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: mull %esi
+; X86-NEXT: movl %edi, %ebx
+; X86-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: mull %ebx
; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: movzbl %bl, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
; X86-NEXT: adcl %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: movl 40(%ebp), %eax
-; X86-NEXT: imull %eax, %ebx
-; X86-NEXT: mull %edi
+; X86-NEXT: imull %eax, %ecx
+; X86-NEXT: mull %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: imull %esi, %edi
+; X86-NEXT: imull %ebx, %esi
+; X86-NEXT: addl %edx, %esi
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: movl 48(%ebp), %eax
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: imull %edi, %ebx
+; X86-NEXT: movl 52(%ebp), %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: imull %ecx, %edi
+; X86-NEXT: mull %ecx
; X86-NEXT: addl %edx, %edi
; X86-NEXT: addl %ebx, %edi
-; X86-NEXT: movl 48(%ebp), %eax
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: movl 52(%ebp), %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: imull %edx, %ebx
-; X86-NEXT: mull %edx
-; X86-NEXT: addl %edx, %ebx
-; X86-NEXT: addl %esi, %ebx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: adcl %edi, %ebx
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: movl 24(%ebp), %edx
-; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: movl 28(%ebp), %ecx
-; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: movl 32(%ebp), %edi
-; X86-NEXT: sbbl %eax, %edi
-; X86-NEXT: movl 36(%ebp), %esi
-; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: adcl %esi, %edi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: sbbl %eax, %ecx
+; X86-NEXT: movl 36(%ebp), %ebx
+; X86-NEXT: sbbl %edi, %ebx
; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl %edx, (%eax)
-; X86-NEXT: movl %ecx, 4(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
-; X86-NEXT: movl %esi, 12(%eax)
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %ecx, 8(%eax)
+; X86-NEXT: movl %ebx, 12(%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
index 859e9244d29d2..199cae7f563b3 100644
--- a/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
+++ b/llvm/test/CodeGen/X86/div-rem-pair-recomposition-unsigned.ll
@@ -152,12 +152,11 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: pushl %esi
; X86-NEXT: andl $-16, %esp
; X86-NEXT: subl $160, %esp
-; X86-NEXT: movl 40(%ebp), %ebx
-; X86-NEXT: movl 52(%ebp), %esi
+; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl 52(%ebp), %ebx
; X86-NEXT: movl 44(%ebp), %edi
; X86-NEXT: movl %edi, %eax
-; X86-NEXT: orl %esi, %eax
-; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: orl %ebx, %eax
; X86-NEXT: orl 48(%ebp), %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: sete %cl
@@ -169,161 +168,157 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: sete %al
; X86-NEXT: orb %cl, %al
; X86-NEXT: movb %al, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; X86-NEXT: bsrl %esi, %edx
-; X86-NEXT: xorl $31, %edx
-; X86-NEXT: bsrl 48(%ebp), %ecx
+; X86-NEXT: bsrl %ebx, %ecx
; X86-NEXT: xorl $31, %ecx
-; X86-NEXT: orl $32, %ecx
-; X86-NEXT: testl %esi, %esi
-; X86-NEXT: cmovnel %edx, %ecx
-; X86-NEXT: bsrl %edi, %edx
+; X86-NEXT: bsrl 48(%ebp), %edx
; X86-NEXT: xorl $31, %edx
-; X86-NEXT: bsrl %ebx, %eax
+; X86-NEXT: orl $32, %edx
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: cmovnel %ecx, %edx
+; X86-NEXT: bsrl %edi, %esi
+; X86-NEXT: xorl $31, %esi
+; X86-NEXT: bsrl 40(%ebp), %eax
; X86-NEXT: xorl $31, %eax
; X86-NEXT: orl $32, %eax
; X86-NEXT: testl %edi, %edi
-; X86-NEXT: cmovnel %edx, %eax
+; X86-NEXT: cmovnel %esi, %eax
; X86-NEXT: orl $64, %eax
-; X86-NEXT: movl 48(%ebp), %edx
-; X86-NEXT: orl %esi, %edx
-; X86-NEXT: cmovnel %ecx, %eax
-; X86-NEXT: movl 36(%ebp), %ebx
-; X86-NEXT: bsrl %ebx, %edx
-; X86-NEXT: xorl $31, %edx
-; X86-NEXT: movl 32(%ebp), %ecx
-; X86-NEXT: bsrl %ecx, %ecx
-; X86-NEXT: xorl $31, %ecx
-; X86-NEXT: orl $32, %ecx
-; X86-NEXT: testl %ebx, %ebx
-; X86-NEXT: cmovnel %edx, %ecx
-; X86-NEXT: movl 28(%ebp), %edi
+; X86-NEXT: movl 48(%ebp), %esi
+; X86-NEXT: orl %ebx, %esi
+; X86-NEXT: cmovnel %edx, %eax
+; X86-NEXT: movl 36(%ebp), %edi
; X86-NEXT: bsrl %edi, %esi
; X86-NEXT: xorl $31, %esi
-; X86-NEXT: bsrl 24(%ebp), %edx
+; X86-NEXT: movl 32(%ebp), %ecx
+; X86-NEXT: bsrl %ecx, %edx
; X86-NEXT: xorl $31, %edx
; X86-NEXT: orl $32, %edx
; X86-NEXT: testl %edi, %edi
; X86-NEXT: cmovnel %esi, %edx
-; X86-NEXT: orl $64, %edx
-; X86-NEXT: movl 32(%ebp), %esi
-; X86-NEXT: orl %ebx, %esi
-; X86-NEXT: cmovnel %ecx, %edx
-; X86-NEXT: xorl %edi, %edi
-; X86-NEXT: subl %edx, %eax
+; X86-NEXT: movl 28(%ebp), %ebx
+; X86-NEXT: bsrl %ebx, %edi
+; X86-NEXT: xorl $31, %edi
+; X86-NEXT: bsrl 24(%ebp), %esi
+; X86-NEXT: xorl $31, %esi
+; X86-NEXT: orl $32, %esi
+; X86-NEXT: testl %ebx, %ebx
+; X86-NEXT: cmovnel %edi, %esi
+; X86-NEXT: orl $64, %esi
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: orl 36(%ebp), %edi
+; X86-NEXT: movl %ecx, %edi
+; X86-NEXT: cmovnel %edx, %esi
+; X86-NEXT: subl %esi, %eax
+; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %edx, %edx
; X86-NEXT: movl $0, %ebx
; X86-NEXT: sbbl %ebx, %ebx
; X86-NEXT: movl $0, %ecx
; X86-NEXT: sbbl %ecx, %ecx
-; X86-NEXT: movl $0, %esi
-; X86-NEXT: sbbl %esi, %esi
-; X86-NEXT: movl $127, %edx
+; X86-NEXT: movl $127, %esi
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: cmpl %eax, %edx
-; X86-NEXT: movl $0, %edx
+; X86-NEXT: cmpl %eax, %esi
+; X86-NEXT: movl $0, %esi
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: sbbl %edx, %esi
+; X86-NEXT: movl $0, %esi
; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %ebx, %edx
-; X86-NEXT: movl $0, %edx
+; X86-NEXT: sbbl %ebx, %esi
+; X86-NEXT: movl $0, %esi
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %ecx, %edx
-; X86-NEXT: movl $0, %edx
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %esi, %edx
-; X86-NEXT: setb %dl
-; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %dl # 1-byte Folded Reload
-; X86-NEXT: movl 36(%ebp), %eax
-; X86-NEXT: cmovnel %edi, %eax
-; X86-NEXT: movl 32(%ebp), %esi
-; X86-NEXT: cmovnel %edi, %esi
-; X86-NEXT: movl 28(%ebp), %edx
-; X86-NEXT: cmovnel %edi, %edx
-; X86-NEXT: movl 24(%ebp), %ebx
-; X86-NEXT: cmovnel %edi, %ebx
-; X86-NEXT: movl 56(%ebp), %edi
-; X86-NEXT: jne .LBB4_8
-; X86-NEXT: # %bb.1: # %_udiv-special-cases
-; X86-NEXT: movl %eax, %edi
+; X86-NEXT: sbbl %ecx, %esi
+; X86-NEXT: setb %ah
+; X86-NEXT: orb {{[-0-9]+}}(%e{{[sb]}}p), %ah # 1-byte Folded Reload
+; X86-NEXT: movl 36(%ebp), %ebx
+; X86-NEXT: movl $0, %ecx
+; X86-NEXT: cmovnel %ecx, %ebx
+; X86-NEXT: cmovnel %ecx, %edi
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: cmovnel %ecx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: cmovnel %ecx, %esi
+; X86-NEXT: jne .LBB4_1
+; X86-NEXT: # %bb.8: # %_udiv-special-cases
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: xorl $127, %eax
; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: orl %eax, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: je .LBB4_9
+; X86-NEXT: # %bb.5: # %udiv-bb1
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: movl %edi, %eax
-; X86-NEXT: movl 56(%ebp), %edi
-; X86-NEXT: movl 24(%ebp), %ecx
-; X86-NEXT: je .LBB4_8
-; X86-NEXT: # %bb.2: # %udiv-bb1
-; X86-NEXT: movl %ecx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: addl $1, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl 24(%ebp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: xorps %xmm0, %xmm0
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 28(%ebp), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 32(%ebp), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 36(%ebp), %eax
-; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl 28(%ebp), %edx
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 32(%ebp), %edx
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
; X86-NEXT: xorb $127, %cl
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: negb %al
; X86-NEXT: movsbl %al, %eax
-; X86-NEXT: movl 136(%esp,%eax), %esi
-; X86-NEXT: movl 140(%esp,%eax), %edx
-; X86-NEXT: shldl %cl, %esi, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 136(%esp,%eax), %edi
+; X86-NEXT: movl 140(%esp,%eax), %ebx
+; X86-NEXT: shldl %cl, %edi, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 128(%esp,%eax), %ebx
-; X86-NEXT: movl 132(%esp,%eax), %edx
-; X86-NEXT: shldl %cl, %edx, %esi
-; X86-NEXT: shldl %cl, %ebx, %edx
+; X86-NEXT: movl 132(%esp,%eax), %eax
+; X86-NEXT: shldl %cl, %eax, %edi
+; X86-NEXT: shldl %cl, %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: # kill: def $cl killed $cl killed $ecx
; X86-NEXT: shll %cl, %ebx
-; X86-NEXT: addl $1, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: adcl $0, %eax
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: adcl $0, %ecx
-; X86-NEXT: adcl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: jae .LBB4_3
-; X86-NEXT: # %bb.6:
-; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: jmp .LBB4_7
-; X86-NEXT: .LBB4_3: # %udiv-preheader
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: je .LBB4_6
+; X86-NEXT: # %bb.2: # %udiv-preheader
; X86-NEXT: movaps %xmm0, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 24(%ebp), %edi
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 28(%ebp), %edi
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 32(%ebp), %edi
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl 36(%ebp), %edi
-; X86-NEXT: movl %edi, {{[0-9]+}}(%esp)
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: movl 24(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl 28(%ebp), %eax
+; X86-NEXT: movl %eax, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %edx, {{[0-9]+}}(%esp)
+; X86-NEXT: movl %esi, {{[0-9]+}}(%esp)
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: movl %ebx, %eax
; X86-NEXT: shrb $3, %al
; X86-NEXT: andb $12, %al
; X86-NEXT: movzbl %al, %eax
-; X86-NEXT: movl 92(%esp,%eax), %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl 92(%esp,%eax), %esi
+; X86-NEXT: movl 88(%esp,%eax), %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %ebx, %ecx
+; X86-NEXT: shrdl %cl, %esi, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl 88(%esp,%eax), %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shrdl %cl, %edx, %edi
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 80(%esp,%eax), %edx
; X86-NEXT: movl 84(%esp,%eax), %eax
-; X86-NEXT: movl %eax, %ebx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: shrdl %cl, %edi, %ebx
-; X86-NEXT: shrl %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %eax, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: shrdl %cl, %eax, %edi
+; X86-NEXT: shrl %cl, %esi
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: shrdl %cl, %eax, %edx
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl 40(%ebp), %eax
@@ -338,41 +333,46 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movl 52(%ebp), %eax
; X86-NEXT: adcl $-1, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: xorl %ecx, %ecx
; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
; X86-NEXT: .p2align 4
-; X86-NEXT: .LBB4_4: # %udiv-do-while
+; X86-NEXT: .LBB4_3: # %udiv-do-while
; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: shldl $1, %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: shldl $1, %ebx, %edx
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: shldl $1, %edi, %ebx
-; X86-NEXT: shldl $1, %ecx, %edi
-; X86-NEXT: shldl $1, %esi, %ecx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: orl %eax, %ecx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: shldl $1, %ecx, %esi
-; X86-NEXT: orl %eax, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl $1, %ebx, %eax
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: shldl $1, %edi, %ebx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: shldl $1, %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: shldl $1, %esi, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $1, %edx, %esi
+; X86-NEXT: orl %ecx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: shldl $1, %esi, %ecx
-; X86-NEXT: orl %eax, %ecx
-; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: addl %esi, %esi
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: shldl $1, %esi, %edx
+; X86-NEXT: orl %ecx, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: shldl $1, %edx, %esi
+; X86-NEXT: orl %ecx, %esi
; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl %edx, %edx
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: cmpl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: sbbl %ebx, %ecx
+; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: sbbl %edx, %ecx
+; X86-NEXT: sbbl %ebx, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: sbbl %eax, %ecx
; X86-NEXT: sarl $31, %ecx
; X86-NEXT: movl %ecx, %eax
; X86-NEXT: andl $1, %eax
@@ -386,94 +386,100 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: andl 40(%ebp), %ecx
; X86-NEXT: subl %ecx, %edi
; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: sbbl %edx, %ebx
-; X86-NEXT: sbbl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; X86-NEXT: sbbl %esi, %eax
-; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: sbbl %edx, %edi
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: sbbl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
; X86-NEXT: addl $-1, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: adcl $-1, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: adcl $-1, %edx
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; X86-NEXT: adcl $-1, %edi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: adcl $-1, %esi
+; X86-NEXT: adcl $-1, %ebx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %edi, %eax
+; X86-NEXT: orl %ebx, %eax
; X86-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: orl %edx, %ecx
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: orl %esi, %ecx
; X86-NEXT: orl %eax, %ecx
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: jne .LBB4_4
-; X86-NEXT: # %bb.5:
+; X86-NEXT: jne .LBB4_3
+; X86-NEXT: # %bb.4:
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; X86-NEXT: jmp .LBB4_7
+; X86-NEXT: .LBB4_1:
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; X86-NEXT: jmp .LBB4_9
+; X86-NEXT: .LBB4_6:
+; X86-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: xorl %ecx, %ecx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: movl %ecx, %eax
-; X86-NEXT: movl 56(%ebp), %edi
; X86-NEXT: .LBB4_7: # %udiv-loop-exit
-; X86-NEXT: shldl $1, %esi, %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; X86-NEXT: orl %ecx, %eax
-; X86-NEXT: shldl $1, %edx, %esi
-; X86-NEXT: orl %ecx, %esi
-; X86-NEXT: shldl $1, %ebx, %edx
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; X86-NEXT: shldl $1, %edi, %ebx
+; X86-NEXT: orl %ecx, %ebx
+; X86-NEXT: shldl $1, %edx, %edi
+; X86-NEXT: orl %ecx, %edi
+; X86-NEXT: shldl $1, %esi, %edx
; X86-NEXT: orl %ecx, %edx
-; X86-NEXT: addl %ebx, %ebx
-; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: .LBB4_8: # %udiv-end
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: addl %esi, %esi
+; X86-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; X86-NEXT: .LBB4_9: # %udiv-end
; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl %ebx, (%edi)
-; X86-NEXT: movl %edx, 4(%edi)
-; X86-NEXT: movl %esi, 8(%edi)
-; X86-NEXT: movl %eax, 12(%edi)
-; X86-NEXT: movl %eax, %ecx
+; X86-NEXT: movl 56(%ebp), %eax
+; X86-NEXT: movl %esi, (%eax)
+; X86-NEXT: movl %edx, 4(%eax)
+; X86-NEXT: movl %edi, 8(%eax)
+; X86-NEXT: movl %ebx, 12(%eax)
; X86-NEXT: movl 48(%ebp), %eax
+; X86-NEXT: movl %esi, %ecx
+; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, %esi
; X86-NEXT: imull %edx, %esi
-; X86-NEXT: mull %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: mull %ecx
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: addl %esi, %edx
+; X86-NEXT: movl %edi, %esi
; X86-NEXT: movl 52(%ebp), %edi
-; X86-NEXT: imull %ebx, %edi
+; X86-NEXT: imull %ecx, %edi
; X86-NEXT: addl %edx, %edi
-; X86-NEXT: movl 40(%ebp), %eax
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; X86-NEXT: mull %ebx
-; X86-NEXT: movl %eax, %esi
-; X86-NEXT: imull 40(%ebp), %ecx
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: movl 44(%ebp), %eax
-; X86-NEXT: imull %eax, %ebx
-; X86-NEXT: addl %ecx, %ebx
-; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl %edi, %ebx
-; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; X86-NEXT: movl %esi, %eax
; X86-NEXT: movl 40(%ebp), %ecx
+; X86-NEXT: movl %ecx, %eax
+; X86-NEXT: mull %esi
+; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: imull %ecx, %ebx
+; X86-NEXT: addl %edx, %ebx
+; X86-NEXT: movl %ebx, %eax
+; X86-NEXT: movl 44(%ebp), %ebx
+; X86-NEXT: imull %ebx, %esi
+; X86-NEXT: addl %eax, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; X86-NEXT: addl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; X86-NEXT: adcl %edi, %esi
+; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; X86-NEXT: movl %edi, %eax
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %edi
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: mull %ecx
-; X86-NEXT: movl %edx, %ebx
; X86-NEXT: movl %eax, %ecx
-; X86-NEXT: addl %edi, %ecx
-; X86-NEXT: adcl $0, %ebx
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: mull 44(%ebp)
-; X86-NEXT: movl 28(%ebp), %esi
+; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: adcl $0, %edx
+; X86-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; X86-NEXT: movl %edi, %eax
+; X86-NEXT: mull %ebx
+; X86-NEXT: movl 32(%ebp), %ebx
; X86-NEXT: movl %edx, %edi
; X86-NEXT: addl %ecx, %eax
; X86-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; X86-NEXT: adcl %ebx, %edi
+; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
; X86-NEXT: setb %cl
; X86-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
; X86-NEXT: mull 44(%ebp)
@@ -481,19 +487,19 @@ define i128 @scalar_i128(i128 %x, i128 %y, ptr %divdst) nounwind {
; X86-NEXT: movzbl %cl, %ecx
; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
-; X86-NEXT: movl 24(%ebp), %ebx
-; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; X86-NEXT: movl 32(%ebp), %edi
-; X86-NEXT: sbbl %eax, %edi
-; X86-NEXT: movl 36(%ebp), %ecx
-; X86-NEXT: sbbl %edx, %ecx
+; X86-NEXT: adcl %esi, %edx
+; X86-NEXT: movl 24(%ebp), %edi
+; X86-NEXT: subl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
+; X86-NEXT: movl 28(%ebp), %ecx
+; X86-NEXT: sbbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; X86-NEXT: sbbl %eax, %ebx
+; X86-NEXT: movl 36(%ebp), %esi
+; X86-NEXT: sbbl %edx, %esi
; X86-NEXT: movl 8(%ebp), %eax
-; X86-NEXT: movl %ebx, (%eax)
-; X86-NEXT: movl %esi, 4(%eax)
-; X86-NEXT: movl %edi, 8(%eax)
-; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: movl %edi, (%eax)
+; X86-NEXT: movl %ecx, 4(%eax)
+; X86-NEXT: movl %ebx, 8(%eax)
+; X86-NEXT: movl %esi, 12(%eax)
; X86-NEXT: leal -12(%ebp), %esp
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 4e31b48ec5cec..1b307b30d8c0d 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -2065,10 +2065,11 @@ define i64 @PR51612(i64 %x, i64 %y) {
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %edx
-; ATHLON-NEXT: incl %edx
-; ATHLON-NEXT: addl $1, %eax
-; ATHLON-NEXT: adcl $0, %ecx
-; ATHLON-NEXT: cmovbl %edx, %eax
+; ATHLON-NEXT: addl $1, %ecx
+; ATHLON-NEXT: adcl $0, %edx
+; ATHLON-NEXT: incl %eax
+; ATHLON-NEXT: orl %ecx, %edx
+; ATHLON-NEXT: cmovnel %ecx, %eax
; ATHLON-NEXT: andl 10, %eax
; ATHLON-NEXT: xorl %edx, %edx
; ATHLON-NEXT: retl
@@ -2077,7 +2078,8 @@ define i64 @PR51612(i64 %x, i64 %y) {
; MCU: # %bb.0:
; MCU-NEXT: addl $1, %eax
; MCU-NEXT: adcl $0, %edx
-; MCU-NEXT: jae .LBB45_2
+; MCU-NEXT: orl %eax, %edx
+; MCU-NEXT: jne .LBB45_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: movl {{[0-9]+}}(%esp), %eax
; MCU-NEXT: incl %eax
diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll
index f72679f55e114..f114d9a2fd192 100644
--- a/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/overflow-intrinsics.ll
@@ -15,6 +15,16 @@ define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo1_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG14:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG14]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG14]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META9:![0-9]+]], !DIExpression(), [[DBG14]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META11:![0-9]+]], !DIExpression(), [[META15:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG16:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META13:![0-9]+]], !DIExpression(), [[DBG16]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG17:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %a
@@ -28,8 +38,19 @@ define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo1_math_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG23:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG23]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG23]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META20:![0-9]+]], !DIExpression(), [[DBG23]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META21:![0-9]+]], !DIExpression(), [[META24:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG25:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META22:![0-9]+]], !DIExpression(), [[DBG25]])
+; DEBUG-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG26:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG27:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %a
@@ -45,6 +66,16 @@ define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo2_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG33:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG33]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG33]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META30:![0-9]+]], !DIExpression(), [[DBG33]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META31:![0-9]+]], !DIExpression(), [[META34:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG35:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META32:![0-9]+]], !DIExpression(), [[DBG35]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG36:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %b
@@ -58,8 +89,19 @@ define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo2_math_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG42:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG42]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG42]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META39:![0-9]+]], !DIExpression(), [[DBG42]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META40:![0-9]+]], !DIExpression(), [[META43:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG44:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META41:![0-9]+]], !DIExpression(), [[DBG44]])
+; DEBUG-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG45:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG46:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %b
@@ -75,6 +117,16 @@ define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo3_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG52:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG52]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG52]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META49:![0-9]+]], !DIExpression(), [[DBG52]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META50:![0-9]+]], !DIExpression(), [[META53:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG54:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META51:![0-9]+]], !DIExpression(), [[DBG54]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG55:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
@@ -88,8 +140,19 @@ define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo3_math_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]]), !dbg [[DBG61:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG61]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG61]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META58:![0-9]+]], !DIExpression(), [[DBG61]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META59:![0-9]+]], !DIExpression(), [[META62:![0-9]+]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG63:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META60:![0-9]+]], !DIExpression(), [[DBG63]])
+; DEBUG-NEXT: store i64 [[MATH]], ptr [[RES:%.*]], align 8, !dbg [[DBG64:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG65:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
@@ -106,6 +169,15 @@ define i64 @uaddo6_xor(i64 %a, i64 %b) {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor(
+; DEBUG-NEXT: #dbg_value(i64 poison, [[META68:![0-9]+]], !DIExpression(), [[META71:![0-9]+]])
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]), !dbg [[DBG72:![0-9]+]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG72]]
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META69:![0-9]+]], !DIExpression(), [[DBG72]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG73:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META70:![0-9]+]], !DIExpression(), [[DBG73]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG74:![0-9]+]]
;
%x = xor i64 %a, -1
%cmp = icmp ult i64 %x, %b
@@ -119,6 +191,15 @@ define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) {
; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor_commuted(
+; DEBUG-NEXT: #dbg_value(i64 poison, [[META77:![0-9]+]], !DIExpression(), [[META80:![0-9]+]])
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[A:%.*]], i64 [[B:%.*]]), !dbg [[DBG81:![0-9]+]]
+; DEBUG-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG81]]
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META78:![0-9]+]], !DIExpression(), [[DBG81]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42, !dbg [[DBG82:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META79:![0-9]+]], !DIExpression(), [[DBG82]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG83:![0-9]+]]
;
%x = xor i64 %a, -1
%cmp = icmp ugt i64 %b, %x
@@ -135,6 +216,16 @@ define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) {
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
; CHECK-NEXT: call void @use(i64 [[X]])
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo6_xor_multi_use(
+; DEBUG-NEXT: [[X:%.*]] = xor i64 -1, [[A:%.*]], !dbg [[DBG89:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[X]], [[META86:![0-9]+]], !DIExpression(), [[DBG89]])
+; DEBUG-NEXT: [[CMP:%.*]] = icmp ult i64 [[X]], [[B:%.*]], !dbg [[DBG90:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META87:![0-9]+]], !DIExpression(), [[DBG90]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42, !dbg [[DBG91:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META88:![0-9]+]], !DIExpression(), [[DBG91]])
+; DEBUG-NEXT: call void @use(i64 [[X]]), !dbg [[DBG92:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG93:![0-9]+]]
;
%x = xor i64 -1, %a
%cmp = icmp ult i64 %x, %b
@@ -145,9 +236,18 @@ define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) {
define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, ptr %p) {
; CHECK-LABEL: @usubo_ult_i64_overflow_used(
-; CHECK-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
; CHECK-NEXT: ret i1 [[OV]]
+;
+; DEBUG-LABEL: @usubo_ult_i64_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]]), !dbg [[DBG98:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG98]]
+; DEBUG-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG98]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META96:![0-9]+]], !DIExpression(), [[DBG98]])
+; DEBUG-NEXT: #dbg_value(i1 [[OV1]], [[META97:![0-9]+]], !DIExpression(), [[META99:![0-9]+]])
+; DEBUG-NEXT: ret i1 [[OV1]], !dbg [[DBG100:![0-9]+]]
;
%s = sub i64 %x, %y
%ov = icmp ult i64 %x, %y
@@ -156,10 +256,20 @@ define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, ptr %p) {
define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) {
; CHECK-LABEL: @usubo_ult_i64_math_overflow_used(
-; CHECK-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: store i64 [[S]], ptr [[P:%.*]]
-; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]])
+; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
+; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: store i64 [[MATH]], ptr [[P:%.*]], align 8
; CHECK-NEXT: ret i1 [[OV]]
+;
+; DEBUG-LABEL: @usubo_ult_i64_math_overflow_used(
+; DEBUG-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[X:%.*]], i64 [[Y:%.*]]), !dbg [[DBG105:![0-9]+]]
+; DEBUG-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0, !dbg [[DBG105]]
+; DEBUG-NEXT: [[OV1:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1, !dbg [[DBG105]]
+; DEBUG-NEXT: #dbg_value(i64 [[MATH]], [[META103:![0-9]+]], !DIExpression(), [[DBG105]])
+; DEBUG-NEXT: store i64 [[MATH]], ptr [[P:%.*]], align 8, !dbg [[DBG106:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i1 [[OV1]], [[META104:![0-9]+]], !DIExpression(), [[META107:![0-9]+]])
+; DEBUG-NEXT: ret i1 [[OV1]], !dbg [[DBG108:![0-9]+]]
;
%s = sub i64 %x, %y
store i64 %s, ptr %p
diff --git a/llvm/test/Transforms/CodeGenPrepare/SPARC/overflow-intrinsics.ll b/llvm/test/Transforms/CodeGenPrepare/SPARC/overflow-intrinsics.ll
index ec60238cbf927..d0a3ca4daa02f 100644
--- a/llvm/test/Transforms/CodeGenPrepare/SPARC/overflow-intrinsics.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/SPARC/overflow-intrinsics.ll
@@ -14,6 +14,15 @@ define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp {
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[ADD]], [[A]]
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo1_overflow_used(
+; DEBUG-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG14:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[ADD]], [[META9:![0-9]+]], !DIExpression(), [[DBG14]])
+; DEBUG-NEXT: [[CMP:%.*]] = icmp ult i64 [[ADD]], [[A]], !dbg [[DBG15:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META11:![0-9]+]], !DIExpression(), [[DBG15]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42, !dbg [[DBG16:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META13:![0-9]+]], !DIExpression(), [[DBG16]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG17:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %a
@@ -23,12 +32,21 @@ define i64 @uaddo1_overflow_used(i64 %a, i64 %b) nounwind ssp {
define i64 @uaddo1_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-LABEL: @uaddo1_math_overflow_used(
-; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
-; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[ADD]], [[A]]
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT: store i64 [[ADD]], ptr [[RES:%.*]], align 8
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo1_math_overflow_used(
+; DEBUG-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG23:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[ADD]], [[META20:![0-9]+]], !DIExpression(), [[DBG23]])
+; DEBUG-NEXT: [[CMP:%.*]] = icmp ult i64 [[ADD]], [[A]], !dbg [[DBG24:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META21:![0-9]+]], !DIExpression(), [[DBG24]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42, !dbg [[DBG25:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META22:![0-9]+]], !DIExpression(), [[DBG25]])
+; DEBUG-NEXT: store i64 [[ADD]], ptr [[RES:%.*]], align 8, !dbg [[DBG26:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG27:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %a
@@ -43,6 +61,15 @@ define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp {
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[ADD]], [[B]]
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo2_overflow_used(
+; DEBUG-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG33:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[ADD]], [[META30:![0-9]+]], !DIExpression(), [[DBG33]])
+; DEBUG-NEXT: [[CMP:%.*]] = icmp ult i64 [[ADD]], [[B]], !dbg [[DBG34:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META31:![0-9]+]], !DIExpression(), [[DBG34]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42, !dbg [[DBG35:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META32:![0-9]+]], !DIExpression(), [[DBG35]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG36:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %b
@@ -52,12 +79,21 @@ define i64 @uaddo2_overflow_used(i64 %a, i64 %b) nounwind ssp {
define i64 @uaddo2_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-LABEL: @uaddo2_math_overflow_used(
-; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
-; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[ADD]], [[B]]
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT: store i64 [[ADD]], ptr [[RES:%.*]], align 8
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo2_math_overflow_used(
+; DEBUG-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG42:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[ADD]], [[META39:![0-9]+]], !DIExpression(), [[DBG42]])
+; DEBUG-NEXT: [[CMP:%.*]] = icmp ult i64 [[ADD]], [[B]], !dbg [[DBG43:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META40:![0-9]+]], !DIExpression(), [[DBG43]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42, !dbg [[DBG44:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META41:![0-9]+]], !DIExpression(), [[DBG44]])
+; DEBUG-NEXT: store i64 [[ADD]], ptr [[RES:%.*]], align 8, !dbg [[DBG45:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG46:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ult i64 %add, %b
@@ -72,6 +108,15 @@ define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp {
; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i64 [[B]], [[ADD]]
; CHECK-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo3_overflow_used(
+; DEBUG-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG52:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[ADD]], [[META49:![0-9]+]], !DIExpression(), [[DBG52]])
+; DEBUG-NEXT: [[CMP:%.*]] = icmp ugt i64 [[B]], [[ADD]], !dbg [[DBG53:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META50:![0-9]+]], !DIExpression(), [[DBG53]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42, !dbg [[DBG54:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META51:![0-9]+]], !DIExpression(), [[DBG54]])
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG55:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
@@ -81,12 +126,21 @@ define i64 @uaddo3_overflow_used(i64 %a, i64 %b) nounwind ssp {
define i64 @uaddo3_math_overflow_used(i64 %a, i64 %b, ptr %res) nounwind ssp {
; CHECK-LABEL: @uaddo3_math_overflow_used(
-; CHECK-NEXT: [[TMP1:%.*]] = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 [[B:%.*]], i64 [[A:%.*]])
-; CHECK-NEXT: [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP1]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP1]], 1
+; CHECK-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]]
+; CHECK-NEXT: [[OV:%.*]] = icmp ugt i64 [[B]], [[ADD]]
; CHECK-NEXT: [[Q:%.*]] = select i1 [[OV]], i64 [[B]], i64 42
-; CHECK-NEXT: store i64 [[MATH]], ptr [[RES:%.*]]
+; CHECK-NEXT: store i64 [[ADD]], ptr [[RES:%.*]], align 8
; CHECK-NEXT: ret i64 [[Q]]
+;
+; DEBUG-LABEL: @uaddo3_math_overflow_used(
+; DEBUG-NEXT: [[ADD:%.*]] = add i64 [[B:%.*]], [[A:%.*]], !dbg [[DBG61:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[ADD]], [[META58:![0-9]+]], !DIExpression(), [[DBG61]])
+; DEBUG-NEXT: [[CMP:%.*]] = icmp ugt i64 [[B]], [[ADD]], !dbg [[DBG62:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i1 [[CMP]], [[META59:![0-9]+]], !DIExpression(), [[DBG62]])
+; DEBUG-NEXT: [[Q:%.*]] = select i1 [[CMP]], i64 [[B]], i64 42, !dbg [[DBG63:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[Q]], [[META60:![0-9]+]], !DIExpression(), [[DBG63]])
+; DEBUG-NEXT: store i64 [[ADD]], ptr [[RES:%.*]], align 8, !dbg [[DBG64:![0-9]+]]
+; DEBUG-NEXT: ret i64 [[Q]], !dbg [[DBG65:![0-9]+]]
;
%add = add i64 %b, %a
%cmp = icmp ugt i64 %b, %add
@@ -100,6 +154,13 @@ define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, ptr %p) {
; CHECK-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]]
; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]]
; CHECK-NEXT: ret i1 [[OV]]
+;
+; DEBUG-LABEL: @usubo_ult_i64_overflow_used(
+; DEBUG-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]], !dbg [[DBG70:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[S]], [[META68:![0-9]+]], !DIExpression(), [[DBG70]])
+; DEBUG-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]], !dbg [[DBG71:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META69:![0-9]+]], !DIExpression(), [[DBG71]])
+; DEBUG-NEXT: ret i1 [[OV]], !dbg [[DBG72:![0-9]+]]
;
%s = sub i64 %x, %y
%ov = icmp ult i64 %x, %y
@@ -109,9 +170,17 @@ define i1 @usubo_ult_i64_overflow_used(i64 %x, i64 %y, ptr %p) {
define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) {
; CHECK-LABEL: @usubo_ult_i64_math_overflow_used(
; CHECK-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT: store i64 [[S]], ptr [[P:%.*]]
+; CHECK-NEXT: store i64 [[S]], ptr [[P:%.*]], align 8
; CHECK-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]]
; CHECK-NEXT: ret i1 [[OV]]
+;
+; DEBUG-LABEL: @usubo_ult_i64_math_overflow_used(
+; DEBUG-NEXT: [[S:%.*]] = sub i64 [[X:%.*]], [[Y:%.*]], !dbg [[DBG77:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i64 [[S]], [[META75:![0-9]+]], !DIExpression(), [[DBG77]])
+; DEBUG-NEXT: store i64 [[S]], ptr [[P:%.*]], align 8, !dbg [[DBG78:![0-9]+]]
+; DEBUG-NEXT: [[OV:%.*]] = icmp ult i64 [[X]], [[Y]], !dbg [[DBG79:![0-9]+]]
+; DEBUG-NEXT: #dbg_value(i1 [[OV]], [[META76:![0-9]+]], !DIExpression(), [[DBG79]])
+; DEBUG-NEXT: ret i1 [[OV]], !dbg [[DBG80:![0-9]+]]
;
%s = sub i64 %x, %y
store i64 %s, ptr %p
>From c38e6dc62a1ff6d6e4c34dfde2fd96a8b90f876c Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Wed, 24 Sep 2025 12:32:43 -0400
Subject: [PATCH 04/12] f
---
llvm/test/CodeGen/PowerPC/sat-add.ll | 62 +-
llvm/test/CodeGen/PowerPC/sat-add.s | 1260 +++++++++++++++++++++
llvm/test/CodeGen/RISCV/branch-on-zero.ll | 28 +-
llvm/test/CodeGen/X86/psubus.ll | 814 +++++++++----
llvm/test/CodeGen/X86/select.ll | 12 +-
test_direct_uaddo.ll | 9 +
test_sat_pattern.ll | 6 +
test_sat_pattern.s | 0
test_scalar_sat.ll | 6 +
test_uaddo_conversion.ll | 9 +
test_uaddo_only.ll | 6 +
test_uaddo_only.s | 22 +
test_uaddsat.ll | 9 +
test_usubo.ll | 15 +
test_vector_uaddo.ll | 9 +
test_vector_uaddo.s | 21 +
trace_uaddsat.ll | 6 +
17 files changed, 1998 insertions(+), 296 deletions(-)
create mode 100644 llvm/test/CodeGen/PowerPC/sat-add.s
create mode 100644 test_direct_uaddo.ll
create mode 100644 test_sat_pattern.ll
create mode 100644 test_sat_pattern.s
create mode 100644 test_scalar_sat.ll
create mode 100644 test_uaddo_conversion.ll
create mode 100644 test_uaddo_only.ll
create mode 100644 test_uaddo_only.s
create mode 100644 test_uaddsat.ll
create mode 100644 test_usubo.ll
create mode 100644 test_vector_uaddo.ll
create mode 100644 test_vector_uaddo.s
create mode 100644 trace_uaddsat.ll
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index fc608f9f6410b..012f03f0b884c 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -24,12 +24,11 @@ define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 4, 3, 24
+; CHECK-NEXT: clrlwi 3, 3, 24
; CHECK-NEXT: addi 3, 3, 42
-; CHECK-NEXT: clrlwi 5, 3, 24
-; CHECK-NEXT: cmplw 4, 5
+; CHECK-NEXT: andi. 4, 3, 256
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iselgt 3, 4, 3
+; CHECK-NEXT: iseleq 3, 3, 4
; CHECK-NEXT: blr
%a = add i8 %x, 42
%c = icmp ugt i8 %x, %a
@@ -70,12 +69,11 @@ define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 4, 3, 16
+; CHECK-NEXT: clrlwi 3, 3, 16
; CHECK-NEXT: addi 3, 3, 42
-; CHECK-NEXT: clrlwi 5, 3, 16
-; CHECK-NEXT: cmplw 4, 5
+; CHECK-NEXT: andis. 4, 3, 1
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iselgt 3, 4, 3
+; CHECK-NEXT: iseleq 3, 3, 4
; CHECK-NEXT: blr
%a = add i16 %x, 42
%c = icmp ugt i16 %x, %a
@@ -116,9 +114,9 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
; CHECK-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
; CHECK: # %bb.0:
; CHECK-NEXT: addi 4, 3, 42
-; CHECK-NEXT: cmplw 3, 4
+; CHECK-NEXT: cmplw 4, 3
; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: iselgt 3, 3, 4
+; CHECK-NEXT: isellt 3, 3, 4
; CHECK-NEXT: blr
%a = add i32 %x, 42
%c = icmp ugt i32 %x, %a
@@ -205,12 +203,12 @@ define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 5, 3, 24
+; CHECK-NEXT: clrlwi 4, 4, 24
+; CHECK-NEXT: clrlwi 3, 3, 24
; CHECK-NEXT: add 3, 3, 4
-; CHECK-NEXT: clrlwi 4, 3, 24
-; CHECK-NEXT: cmplw 5, 4
+; CHECK-NEXT: andi. 4, 3, 256
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iselgt 3, 4, 3
+; CHECK-NEXT: iseleq 3, 3, 4
; CHECK-NEXT: blr
%a = add i8 %x, %y
%c = icmp ugt i8 %x, %a
@@ -256,12 +254,12 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 5, 3, 16
+; CHECK-NEXT: clrlwi 4, 4, 16
+; CHECK-NEXT: clrlwi 3, 3, 16
; CHECK-NEXT: add 3, 3, 4
-; CHECK-NEXT: clrlwi 4, 3, 16
-; CHECK-NEXT: cmplw 5, 4
+; CHECK-NEXT: andis. 4, 3, 1
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iselgt 3, 4, 3
+; CHECK-NEXT: iseleq 3, 3, 4
; CHECK-NEXT: blr
%a = add i16 %x, %y
%c = icmp ugt i16 %x, %a
@@ -306,9 +304,9 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
; CHECK-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
; CHECK: # %bb.0:
; CHECK-NEXT: add 4, 3, 4
-; CHECK-NEXT: cmplw 3, 4
+; CHECK-NEXT: cmplw 4, 3
; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: iselgt 3, 3, 4
+; CHECK-NEXT: isellt 3, 3, 4
; CHECK-NEXT: blr
%a = add i32 %x, %y
%c = icmp ugt i32 %x, %a
@@ -402,7 +400,9 @@ define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
; CHECK-NEXT: addis 3, 2, .LCPI25_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI25_0 at toc@l
; CHECK-NEXT: lxvd2x 35, 0, 3
-; CHECK-NEXT: vaddubs 2, 2, 3
+; CHECK-NEXT: vaddubm 3, 2, 3
+; CHECK-NEXT: vcmpgtub 2, 2, 3
+; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: blr
%a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
%c = icmp ugt <16 x i8> %x, %a
@@ -448,7 +448,9 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
; CHECK-NEXT: addis 3, 2, .LCPI28_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI28_0 at toc@l
; CHECK-NEXT: lxvd2x 35, 0, 3
-; CHECK-NEXT: vadduhs 2, 2, 3
+; CHECK-NEXT: vadduhm 3, 2, 3
+; CHECK-NEXT: vcmpgtuh 2, 2, 3
+; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: blr
%a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
%c = icmp ugt <8 x i16> %x, %a
@@ -494,7 +496,9 @@ define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
; CHECK-NEXT: addis 3, 2, .LCPI31_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI31_0 at toc@l
; CHECK-NEXT: lxvd2x 35, 0, 3
-; CHECK-NEXT: vadduws 2, 2, 3
+; CHECK-NEXT: vadduwm 3, 2, 3
+; CHECK-NEXT: vcmpgtuw 2, 2, 3
+; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: blr
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
%c = icmp ugt <4 x i32> %x, %a
@@ -586,7 +590,9 @@ define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8>
define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: vaddubs 2, 2, 3
+; CHECK-NEXT: vaddubm 3, 2, 3
+; CHECK-NEXT: vcmpgtub 2, 2, 3
+; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: blr
%a = add <16 x i8> %x, %y
%c = icmp ugt <16 x i8> %x, %a
@@ -626,7 +632,9 @@ define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16>
define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) {
; CHECK-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: vadduhs 2, 2, 3
+; CHECK-NEXT: vadduhm 3, 2, 3
+; CHECK-NEXT: vcmpgtuh 2, 2, 3
+; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: blr
%a = add <8 x i16> %x, %y
%c = icmp ugt <8 x i16> %x, %a
@@ -666,7 +674,9 @@ define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32>
define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: vadduws 2, 2, 3
+; CHECK-NEXT: vadduwm 3, 2, 3
+; CHECK-NEXT: vcmpgtuw 2, 2, 3
+; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: blr
%a = add <4 x i32> %x, %y
%c = icmp ugt <4 x i32> %x, %a
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.s b/llvm/test/CodeGen/PowerPC/sat-add.s
new file mode 100644
index 0000000000000..ca085fc0f6997
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/sat-add.s
@@ -0,0 +1,1260 @@
+ .abiversion 2
+ .file "sat-add.ll"
+ .text
+ .globl unsigned_sat_constant_i8_using_min # -- Begin function unsigned_sat_constant_i8_using_min
+ .p2align 4
+ .type unsigned_sat_constant_i8_using_min, at function
+unsigned_sat_constant_i8_using_min: # @unsigned_sat_constant_i8_using_min
+.Lfunc_begin0:
+ .cfi_startproc
+# %bb.0:
+ clrlwi 4, 3, 24
+ cmplwi 4, 213
+ li 4, -43
+ isellt 3, 3, 4
+ addi 3, 3, 42
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end0:
+ .size unsigned_sat_constant_i8_using_min, .Lfunc_end0-.Lfunc_begin0
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i8_using_cmp_sum # -- Begin function unsigned_sat_constant_i8_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_constant_i8_using_cmp_sum, at function
+unsigned_sat_constant_i8_using_cmp_sum: # @unsigned_sat_constant_i8_using_cmp_sum
+.Lfunc_begin1:
+ .cfi_startproc
+# %bb.0:
+ clrlwi 3, 3, 24
+ addi 3, 3, 42
+ andi. 4, 3, 256
+ li 4, -1
+ iseleq 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end1:
+ .size unsigned_sat_constant_i8_using_cmp_sum, .Lfunc_end1-.Lfunc_begin1
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i8_using_cmp_notval # -- Begin function unsigned_sat_constant_i8_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_constant_i8_using_cmp_notval, at function
+unsigned_sat_constant_i8_using_cmp_notval: # @unsigned_sat_constant_i8_using_cmp_notval
+.Lfunc_begin2:
+ .cfi_startproc
+# %bb.0:
+ clrlwi 4, 3, 24
+ addi 3, 3, 42
+ cmplwi 4, 213
+ li 4, -1
+ iselgt 3, 4, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end2:
+ .size unsigned_sat_constant_i8_using_cmp_notval, .Lfunc_end2-.Lfunc_begin2
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i16_using_min # -- Begin function unsigned_sat_constant_i16_using_min
+ .p2align 4
+ .type unsigned_sat_constant_i16_using_min, at function
+unsigned_sat_constant_i16_using_min: # @unsigned_sat_constant_i16_using_min
+.Lfunc_begin3:
+ .cfi_startproc
+# %bb.0:
+ clrlwi 4, 3, 16
+ cmplwi 4, 65493
+ li 4, -43
+ isellt 3, 3, 4
+ addi 3, 3, 42
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end3:
+ .size unsigned_sat_constant_i16_using_min, .Lfunc_end3-.Lfunc_begin3
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i16_using_cmp_sum # -- Begin function unsigned_sat_constant_i16_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_constant_i16_using_cmp_sum, at function
+unsigned_sat_constant_i16_using_cmp_sum: # @unsigned_sat_constant_i16_using_cmp_sum
+.Lfunc_begin4:
+ .cfi_startproc
+# %bb.0:
+ clrlwi 3, 3, 16
+ addi 3, 3, 42
+ andis. 4, 3, 1
+ li 4, -1
+ iseleq 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end4:
+ .size unsigned_sat_constant_i16_using_cmp_sum, .Lfunc_end4-.Lfunc_begin4
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i16_using_cmp_notval # -- Begin function unsigned_sat_constant_i16_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_constant_i16_using_cmp_notval, at function
+unsigned_sat_constant_i16_using_cmp_notval: # @unsigned_sat_constant_i16_using_cmp_notval
+.Lfunc_begin5:
+ .cfi_startproc
+# %bb.0:
+ clrlwi 4, 3, 16
+ addi 3, 3, 42
+ cmplwi 4, 65493
+ li 4, -1
+ iselgt 3, 4, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end5:
+ .size unsigned_sat_constant_i16_using_cmp_notval, .Lfunc_end5-.Lfunc_begin5
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i32_using_min # -- Begin function unsigned_sat_constant_i32_using_min
+ .p2align 4
+ .type unsigned_sat_constant_i32_using_min, at function
+unsigned_sat_constant_i32_using_min: # @unsigned_sat_constant_i32_using_min
+.Lfunc_begin6:
+ .cfi_startproc
+# %bb.0:
+ li 4, -43
+ cmplw 3, 4
+ isellt 3, 3, 4
+ addi 3, 3, 42
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end6:
+ .size unsigned_sat_constant_i32_using_min, .Lfunc_end6-.Lfunc_begin6
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i32_using_cmp_sum # -- Begin function unsigned_sat_constant_i32_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_constant_i32_using_cmp_sum, at function
+unsigned_sat_constant_i32_using_cmp_sum: # @unsigned_sat_constant_i32_using_cmp_sum
+.Lfunc_begin7:
+ .cfi_startproc
+# %bb.0:
+ addi 4, 3, 42
+ cmplw 4, 3
+ li 3, -1
+ isellt 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end7:
+ .size unsigned_sat_constant_i32_using_cmp_sum, .Lfunc_end7-.Lfunc_begin7
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i32_using_cmp_notval # -- Begin function unsigned_sat_constant_i32_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_constant_i32_using_cmp_notval, at function
+unsigned_sat_constant_i32_using_cmp_notval: # @unsigned_sat_constant_i32_using_cmp_notval
+.Lfunc_begin8:
+ .cfi_startproc
+# %bb.0:
+ li 5, -43
+ addi 4, 3, 42
+ cmplw 3, 5
+ li 3, -1
+ iselgt 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end8:
+ .size unsigned_sat_constant_i32_using_cmp_notval, .Lfunc_end8-.Lfunc_begin8
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i64_using_min # -- Begin function unsigned_sat_constant_i64_using_min
+ .p2align 4
+ .type unsigned_sat_constant_i64_using_min, at function
+unsigned_sat_constant_i64_using_min: # @unsigned_sat_constant_i64_using_min
+.Lfunc_begin9:
+ .cfi_startproc
+# %bb.0:
+ li 4, -43
+ cmpld 3, 4
+ isellt 3, 3, 4
+ addi 3, 3, 42
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end9:
+ .size unsigned_sat_constant_i64_using_min, .Lfunc_end9-.Lfunc_begin9
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i64_using_cmp_sum # -- Begin function unsigned_sat_constant_i64_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_constant_i64_using_cmp_sum, at function
+unsigned_sat_constant_i64_using_cmp_sum: # @unsigned_sat_constant_i64_using_cmp_sum
+.Lfunc_begin10:
+ .cfi_startproc
+# %bb.0:
+ li 4, 0
+ addic 3, 3, 42
+ addze. 4, 4
+ li 4, -1
+ iseleq 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end10:
+ .size unsigned_sat_constant_i64_using_cmp_sum, .Lfunc_end10-.Lfunc_begin10
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i64_using_cmp_notval # -- Begin function unsigned_sat_constant_i64_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_constant_i64_using_cmp_notval, at function
+unsigned_sat_constant_i64_using_cmp_notval: # @unsigned_sat_constant_i64_using_cmp_notval
+.Lfunc_begin11:
+ .cfi_startproc
+# %bb.0:
+ li 4, 0
+ addic 3, 3, 42
+ addze. 4, 4
+ li 4, -1
+ iseleq 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end11:
+ .size unsigned_sat_constant_i64_using_cmp_notval, .Lfunc_end11-.Lfunc_begin11
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_i8_using_min # -- Begin function unsigned_sat_variable_i8_using_min
+ .p2align 4
+ .type unsigned_sat_variable_i8_using_min, at function
+unsigned_sat_variable_i8_using_min: # @unsigned_sat_variable_i8_using_min
+.Lfunc_begin12:
+ .cfi_startproc
+# %bb.0:
+ not 6, 4
+ clrlwi 5, 3, 24
+ clrlwi 7, 6, 24
+ cmplw 5, 7
+ isellt 3, 3, 6
+ add 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end12:
+ .size unsigned_sat_variable_i8_using_min, .Lfunc_end12-.Lfunc_begin12
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_i8_using_cmp_sum # -- Begin function unsigned_sat_variable_i8_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_variable_i8_using_cmp_sum, at function
+unsigned_sat_variable_i8_using_cmp_sum: # @unsigned_sat_variable_i8_using_cmp_sum
+.Lfunc_begin13:
+ .cfi_startproc
+# %bb.0:
+ clrlwi 4, 4, 24
+ clrlwi 3, 3, 24
+ add 3, 3, 4
+ andi. 4, 3, 256
+ li 4, -1
+ iseleq 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end13:
+ .size unsigned_sat_variable_i8_using_cmp_sum, .Lfunc_end13-.Lfunc_begin13
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_i8_using_cmp_notval # -- Begin function unsigned_sat_variable_i8_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_variable_i8_using_cmp_notval, at function
+unsigned_sat_variable_i8_using_cmp_notval: # @unsigned_sat_variable_i8_using_cmp_notval
+.Lfunc_begin14:
+ .cfi_startproc
+# %bb.0:
+ not 6, 4
+ clrlwi 5, 3, 24
+ add 3, 3, 4
+ li 4, -1
+ clrlwi 6, 6, 24
+ cmplw 5, 6
+ iselgt 3, 4, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end14:
+ .size unsigned_sat_variable_i8_using_cmp_notval, .Lfunc_end14-.Lfunc_begin14
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_i16_using_min # -- Begin function unsigned_sat_variable_i16_using_min
+ .p2align 4
+ .type unsigned_sat_variable_i16_using_min, at function
+unsigned_sat_variable_i16_using_min: # @unsigned_sat_variable_i16_using_min
+.Lfunc_begin15:
+ .cfi_startproc
+# %bb.0:
+ not 6, 4
+ clrlwi 5, 3, 16
+ clrlwi 7, 6, 16
+ cmplw 5, 7
+ isellt 3, 3, 6
+ add 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end15:
+ .size unsigned_sat_variable_i16_using_min, .Lfunc_end15-.Lfunc_begin15
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_i16_using_cmp_sum # -- Begin function unsigned_sat_variable_i16_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_variable_i16_using_cmp_sum, at function
+unsigned_sat_variable_i16_using_cmp_sum: # @unsigned_sat_variable_i16_using_cmp_sum
+.Lfunc_begin16:
+ .cfi_startproc
+# %bb.0:
+ clrlwi 4, 4, 16
+ clrlwi 3, 3, 16
+ add 3, 3, 4
+ andis. 4, 3, 1
+ li 4, -1
+ iseleq 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end16:
+ .size unsigned_sat_variable_i16_using_cmp_sum, .Lfunc_end16-.Lfunc_begin16
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_i16_using_cmp_notval # -- Begin function unsigned_sat_variable_i16_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_variable_i16_using_cmp_notval, at function
+unsigned_sat_variable_i16_using_cmp_notval: # @unsigned_sat_variable_i16_using_cmp_notval
+.Lfunc_begin17:
+ .cfi_startproc
+# %bb.0:
+ not 6, 4
+ clrlwi 5, 3, 16
+ add 3, 3, 4
+ li 4, -1
+ clrlwi 6, 6, 16
+ cmplw 5, 6
+ iselgt 3, 4, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end17:
+ .size unsigned_sat_variable_i16_using_cmp_notval, .Lfunc_end17-.Lfunc_begin17
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_i32_using_min # -- Begin function unsigned_sat_variable_i32_using_min
+ .p2align 4
+ .type unsigned_sat_variable_i32_using_min, at function
+unsigned_sat_variable_i32_using_min: # @unsigned_sat_variable_i32_using_min
+.Lfunc_begin18:
+ .cfi_startproc
+# %bb.0:
+ not 5, 4
+ cmplw 3, 5
+ isellt 3, 3, 5
+ add 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end18:
+ .size unsigned_sat_variable_i32_using_min, .Lfunc_end18-.Lfunc_begin18
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_i32_using_cmp_sum # -- Begin function unsigned_sat_variable_i32_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_variable_i32_using_cmp_sum, at function
+unsigned_sat_variable_i32_using_cmp_sum: # @unsigned_sat_variable_i32_using_cmp_sum
+.Lfunc_begin19:
+ .cfi_startproc
+# %bb.0:
+ add 4, 3, 4
+ cmplw 4, 3
+ li 3, -1
+ isellt 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end19:
+ .size unsigned_sat_variable_i32_using_cmp_sum, .Lfunc_end19-.Lfunc_begin19
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_i32_using_cmp_notval # -- Begin function unsigned_sat_variable_i32_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_variable_i32_using_cmp_notval, at function
+unsigned_sat_variable_i32_using_cmp_notval: # @unsigned_sat_variable_i32_using_cmp_notval
+.Lfunc_begin20:
+ .cfi_startproc
+# %bb.0:
+ not 5, 4
+ add 4, 3, 4
+ cmplw 3, 5
+ li 3, -1
+ iselgt 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end20:
+ .size unsigned_sat_variable_i32_using_cmp_notval, .Lfunc_end20-.Lfunc_begin20
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_i64_using_min # -- Begin function unsigned_sat_variable_i64_using_min
+ .p2align 4
+ .type unsigned_sat_variable_i64_using_min, at function
+unsigned_sat_variable_i64_using_min: # @unsigned_sat_variable_i64_using_min
+.Lfunc_begin21:
+ .cfi_startproc
+# %bb.0:
+ not 5, 4
+ cmpld 3, 5
+ isellt 3, 3, 5
+ add 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end21:
+ .size unsigned_sat_variable_i64_using_min, .Lfunc_end21-.Lfunc_begin21
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_i64_using_cmp_sum # -- Begin function unsigned_sat_variable_i64_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_variable_i64_using_cmp_sum, at function
+unsigned_sat_variable_i64_using_cmp_sum: # @unsigned_sat_variable_i64_using_cmp_sum
+.Lfunc_begin22:
+ .cfi_startproc
+# %bb.0:
+ addc 3, 3, 4
+ li 4, 0
+ addze. 4, 4
+ li 4, -1
+ iseleq 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end22:
+ .size unsigned_sat_variable_i64_using_cmp_sum, .Lfunc_end22-.Lfunc_begin22
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_i64_using_cmp_notval # -- Begin function unsigned_sat_variable_i64_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_variable_i64_using_cmp_notval, at function
+unsigned_sat_variable_i64_using_cmp_notval: # @unsigned_sat_variable_i64_using_cmp_notval
+.Lfunc_begin23:
+ .cfi_startproc
+# %bb.0:
+ not 5, 4
+ add 4, 3, 4
+ cmpld 3, 5
+ li 3, -1
+ iselgt 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end23:
+ .size unsigned_sat_variable_i64_using_cmp_notval, .Lfunc_end23-.Lfunc_begin23
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v16i8_using_min
+.LCPI24_0:
+ .space 16,213
+.LCPI24_1:
+ .space 16,42
+ .text
+ .globl unsigned_sat_constant_v16i8_using_min
+ .p2align 4
+ .type unsigned_sat_constant_v16i8_using_min, at function
+unsigned_sat_constant_v16i8_using_min: # @unsigned_sat_constant_v16i8_using_min
+.Lfunc_begin24:
+ .cfi_startproc
+.Lfunc_gep24:
+ addis 2, 12, .TOC.-.Lfunc_gep24 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep24 at l
+.Lfunc_lep24:
+ .localentry unsigned_sat_constant_v16i8_using_min, .Lfunc_lep24-.Lfunc_gep24
+# %bb.0:
+ addis 3, 2, .LCPI24_0 at toc@ha
+ addi 3, 3, .LCPI24_0 at toc@l
+ lxvd2x 35, 0, 3
+ addis 3, 2, .LCPI24_1 at toc@ha
+ addi 3, 3, .LCPI24_1 at toc@l
+ vminub 2, 2, 3
+ lxvd2x 35, 0, 3
+ vaddubm 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end24:
+ .size unsigned_sat_constant_v16i8_using_min, .Lfunc_end24-.Lfunc_begin24
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v16i8_using_cmp_sum
+.LCPI25_0:
+ .space 16,42
+ .text
+ .globl unsigned_sat_constant_v16i8_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_constant_v16i8_using_cmp_sum, at function
+unsigned_sat_constant_v16i8_using_cmp_sum: # @unsigned_sat_constant_v16i8_using_cmp_sum
+.Lfunc_begin25:
+ .cfi_startproc
+.Lfunc_gep25:
+ addis 2, 12, .TOC.-.Lfunc_gep25 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep25 at l
+.Lfunc_lep25:
+ .localentry unsigned_sat_constant_v16i8_using_cmp_sum, .Lfunc_lep25-.Lfunc_gep25
+# %bb.0:
+ addis 3, 2, .LCPI25_0 at toc@ha
+ addi 3, 3, .LCPI25_0 at toc@l
+ lxvd2x 35, 0, 3
+ vaddubs 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end25:
+ .size unsigned_sat_constant_v16i8_using_cmp_sum, .Lfunc_end25-.Lfunc_begin25
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v16i8_using_cmp_notval
+.LCPI26_0:
+ .space 16,42
+ .text
+ .globl unsigned_sat_constant_v16i8_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_constant_v16i8_using_cmp_notval, at function
+unsigned_sat_constant_v16i8_using_cmp_notval: # @unsigned_sat_constant_v16i8_using_cmp_notval
+.Lfunc_begin26:
+ .cfi_startproc
+.Lfunc_gep26:
+ addis 2, 12, .TOC.-.Lfunc_gep26 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep26 at l
+.Lfunc_lep26:
+ .localentry unsigned_sat_constant_v16i8_using_cmp_notval, .Lfunc_lep26-.Lfunc_gep26
+# %bb.0:
+ addis 3, 2, .LCPI26_0 at toc@ha
+ addi 3, 3, .LCPI26_0 at toc@l
+ lxvd2x 35, 0, 3
+ vaddubs 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end26:
+ .size unsigned_sat_constant_v16i8_using_cmp_notval, .Lfunc_end26-.Lfunc_begin26
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v8i16_using_min
+.LCPI27_0:
+ .short 65493 # 0xffd5
+ .short 65493 # 0xffd5
+ .short 65493 # 0xffd5
+ .short 65493 # 0xffd5
+ .short 65493 # 0xffd5
+ .short 65493 # 0xffd5
+ .short 65493 # 0xffd5
+ .short 65493 # 0xffd5
+.LCPI27_1:
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .text
+ .globl unsigned_sat_constant_v8i16_using_min
+ .p2align 4
+ .type unsigned_sat_constant_v8i16_using_min, at function
+unsigned_sat_constant_v8i16_using_min: # @unsigned_sat_constant_v8i16_using_min
+.Lfunc_begin27:
+ .cfi_startproc
+.Lfunc_gep27:
+ addis 2, 12, .TOC.-.Lfunc_gep27 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep27 at l
+.Lfunc_lep27:
+ .localentry unsigned_sat_constant_v8i16_using_min, .Lfunc_lep27-.Lfunc_gep27
+# %bb.0:
+ addis 3, 2, .LCPI27_0 at toc@ha
+ addi 3, 3, .LCPI27_0 at toc@l
+ lxvd2x 35, 0, 3
+ addis 3, 2, .LCPI27_1 at toc@ha
+ addi 3, 3, .LCPI27_1 at toc@l
+ vminuh 2, 2, 3
+ lxvd2x 35, 0, 3
+ vadduhm 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end27:
+ .size unsigned_sat_constant_v8i16_using_min, .Lfunc_end27-.Lfunc_begin27
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v8i16_using_cmp_sum
+.LCPI28_0:
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .text
+ .globl unsigned_sat_constant_v8i16_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_constant_v8i16_using_cmp_sum, at function
+unsigned_sat_constant_v8i16_using_cmp_sum: # @unsigned_sat_constant_v8i16_using_cmp_sum
+.Lfunc_begin28:
+ .cfi_startproc
+.Lfunc_gep28:
+ addis 2, 12, .TOC.-.Lfunc_gep28 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep28 at l
+.Lfunc_lep28:
+ .localentry unsigned_sat_constant_v8i16_using_cmp_sum, .Lfunc_lep28-.Lfunc_gep28
+# %bb.0:
+ addis 3, 2, .LCPI28_0 at toc@ha
+ addi 3, 3, .LCPI28_0 at toc@l
+ lxvd2x 35, 0, 3
+ vadduhs 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end28:
+ .size unsigned_sat_constant_v8i16_using_cmp_sum, .Lfunc_end28-.Lfunc_begin28
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v8i16_using_cmp_notval
+.LCPI29_0:
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .short 42 # 0x2a
+ .text
+ .globl unsigned_sat_constant_v8i16_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_constant_v8i16_using_cmp_notval, at function
+unsigned_sat_constant_v8i16_using_cmp_notval: # @unsigned_sat_constant_v8i16_using_cmp_notval
+.Lfunc_begin29:
+ .cfi_startproc
+.Lfunc_gep29:
+ addis 2, 12, .TOC.-.Lfunc_gep29 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep29 at l
+.Lfunc_lep29:
+ .localentry unsigned_sat_constant_v8i16_using_cmp_notval, .Lfunc_lep29-.Lfunc_gep29
+# %bb.0:
+ addis 3, 2, .LCPI29_0 at toc@ha
+ addi 3, 3, .LCPI29_0 at toc@l
+ lxvd2x 35, 0, 3
+ vadduhs 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end29:
+ .size unsigned_sat_constant_v8i16_using_cmp_notval, .Lfunc_end29-.Lfunc_begin29
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v4i32_using_min
+.LCPI30_0:
+ .long 4294967253 # 0xffffffd5
+ .long 4294967253 # 0xffffffd5
+ .long 4294967253 # 0xffffffd5
+ .long 4294967253 # 0xffffffd5
+.LCPI30_1:
+ .long 42 # 0x2a
+ .long 42 # 0x2a
+ .long 42 # 0x2a
+ .long 42 # 0x2a
+ .text
+ .globl unsigned_sat_constant_v4i32_using_min
+ .p2align 4
+ .type unsigned_sat_constant_v4i32_using_min, at function
+unsigned_sat_constant_v4i32_using_min: # @unsigned_sat_constant_v4i32_using_min
+.Lfunc_begin30:
+ .cfi_startproc
+.Lfunc_gep30:
+ addis 2, 12, .TOC.-.Lfunc_gep30 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep30 at l
+.Lfunc_lep30:
+ .localentry unsigned_sat_constant_v4i32_using_min, .Lfunc_lep30-.Lfunc_gep30
+# %bb.0:
+ addis 3, 2, .LCPI30_0 at toc@ha
+ addi 3, 3, .LCPI30_0 at toc@l
+ lxvd2x 35, 0, 3
+ addis 3, 2, .LCPI30_1 at toc@ha
+ addi 3, 3, .LCPI30_1 at toc@l
+ vminuw 2, 2, 3
+ lxvd2x 35, 0, 3
+ vadduwm 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end30:
+ .size unsigned_sat_constant_v4i32_using_min, .Lfunc_end30-.Lfunc_begin30
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v4i32_using_cmp_sum
+.LCPI31_0:
+ .long 42 # 0x2a
+ .long 42 # 0x2a
+ .long 42 # 0x2a
+ .long 42 # 0x2a
+ .text
+ .globl unsigned_sat_constant_v4i32_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_constant_v4i32_using_cmp_sum, at function
+unsigned_sat_constant_v4i32_using_cmp_sum: # @unsigned_sat_constant_v4i32_using_cmp_sum
+.Lfunc_begin31:
+ .cfi_startproc
+.Lfunc_gep31:
+ addis 2, 12, .TOC.-.Lfunc_gep31 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep31 at l
+.Lfunc_lep31:
+ .localentry unsigned_sat_constant_v4i32_using_cmp_sum, .Lfunc_lep31-.Lfunc_gep31
+# %bb.0:
+ addis 3, 2, .LCPI31_0 at toc@ha
+ addi 3, 3, .LCPI31_0 at toc@l
+ lxvd2x 35, 0, 3
+ vadduws 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end31:
+ .size unsigned_sat_constant_v4i32_using_cmp_sum, .Lfunc_end31-.Lfunc_begin31
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v4i32_using_cmp_notval
+.LCPI32_0:
+ .long 42 # 0x2a
+ .long 42 # 0x2a
+ .long 42 # 0x2a
+ .long 42 # 0x2a
+ .text
+ .globl unsigned_sat_constant_v4i32_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_constant_v4i32_using_cmp_notval, at function
+unsigned_sat_constant_v4i32_using_cmp_notval: # @unsigned_sat_constant_v4i32_using_cmp_notval
+.Lfunc_begin32:
+ .cfi_startproc
+.Lfunc_gep32:
+ addis 2, 12, .TOC.-.Lfunc_gep32 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep32 at l
+.Lfunc_lep32:
+ .localentry unsigned_sat_constant_v4i32_using_cmp_notval, .Lfunc_lep32-.Lfunc_gep32
+# %bb.0:
+ addis 3, 2, .LCPI32_0 at toc@ha
+ addi 3, 3, .LCPI32_0 at toc@l
+ lxvd2x 35, 0, 3
+ vadduws 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end32:
+ .size unsigned_sat_constant_v4i32_using_cmp_notval, .Lfunc_end32-.Lfunc_begin32
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v2i64_using_min
+.LCPI33_0:
+ .quad -43 # 0xffffffffffffffd5
+ .quad -43 # 0xffffffffffffffd5
+.LCPI33_1:
+ .quad 42 # 0x2a
+ .quad 42 # 0x2a
+ .text
+ .globl unsigned_sat_constant_v2i64_using_min
+ .p2align 4
+ .type unsigned_sat_constant_v2i64_using_min, at function
+unsigned_sat_constant_v2i64_using_min: # @unsigned_sat_constant_v2i64_using_min
+.Lfunc_begin33:
+ .cfi_startproc
+.Lfunc_gep33:
+ addis 2, 12, .TOC.-.Lfunc_gep33 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep33 at l
+.Lfunc_lep33:
+ .localentry unsigned_sat_constant_v2i64_using_min, .Lfunc_lep33-.Lfunc_gep33
+# %bb.0:
+ addis 3, 2, .LCPI33_0 at toc@ha
+ addi 3, 3, .LCPI33_0 at toc@l
+ lxvd2x 35, 0, 3
+ addis 3, 2, .LCPI33_1 at toc@ha
+ addi 3, 3, .LCPI33_1 at toc@l
+ vminud 2, 2, 3
+ lxvd2x 35, 0, 3
+ vaddudm 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end33:
+ .size unsigned_sat_constant_v2i64_using_min, .Lfunc_end33-.Lfunc_begin33
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v2i64_using_cmp_sum
+.LCPI34_0:
+ .quad 42 # 0x2a
+ .quad 42 # 0x2a
+.LCPI34_1:
+ .quad -43 # 0xffffffffffffffd5
+ .quad -43 # 0xffffffffffffffd5
+ .text
+ .globl unsigned_sat_constant_v2i64_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_constant_v2i64_using_cmp_sum, at function
+unsigned_sat_constant_v2i64_using_cmp_sum: # @unsigned_sat_constant_v2i64_using_cmp_sum
+.Lfunc_begin34:
+ .cfi_startproc
+.Lfunc_gep34:
+ addis 2, 12, .TOC.-.Lfunc_gep34 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep34 at l
+.Lfunc_lep34:
+ .localentry unsigned_sat_constant_v2i64_using_cmp_sum, .Lfunc_lep34-.Lfunc_gep34
+# %bb.0:
+ addis 3, 2, .LCPI34_0 at toc@ha
+ addi 3, 3, .LCPI34_0 at toc@l
+ lxvd2x 35, 0, 3
+ addis 3, 2, .LCPI34_1 at toc@ha
+ addi 3, 3, .LCPI34_1 at toc@l
+ lxvd2x 36, 0, 3
+ vminud 2, 2, 4
+ vaddudm 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end34:
+ .size unsigned_sat_constant_v2i64_using_cmp_sum, .Lfunc_end34-.Lfunc_begin34
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v2i64_using_cmp_notval
+.LCPI35_0:
+ .quad 42 # 0x2a
+ .quad 42 # 0x2a
+.LCPI35_1:
+ .quad -43 # 0xffffffffffffffd5
+ .quad -43 # 0xffffffffffffffd5
+ .text
+ .globl unsigned_sat_constant_v2i64_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_constant_v2i64_using_cmp_notval, at function
+unsigned_sat_constant_v2i64_using_cmp_notval: # @unsigned_sat_constant_v2i64_using_cmp_notval
+.Lfunc_begin35:
+ .cfi_startproc
+.Lfunc_gep35:
+ addis 2, 12, .TOC.-.Lfunc_gep35 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep35 at l
+.Lfunc_lep35:
+ .localentry unsigned_sat_constant_v2i64_using_cmp_notval, .Lfunc_lep35-.Lfunc_gep35
+# %bb.0:
+ addis 3, 2, .LCPI35_0 at toc@ha
+ addi 3, 3, .LCPI35_0 at toc@l
+ lxvd2x 35, 0, 3
+ addis 3, 2, .LCPI35_1 at toc@ha
+ addi 3, 3, .LCPI35_1 at toc@l
+ lxvd2x 36, 0, 3
+ vaddudm 3, 2, 3
+ vcmpgtud 2, 2, 4
+ xxlor 34, 34, 35
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end35:
+ .size unsigned_sat_constant_v2i64_using_cmp_notval, .Lfunc_end35-.Lfunc_begin35
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_v16i8_using_min # -- Begin function unsigned_sat_variable_v16i8_using_min
+ .p2align 4
+ .type unsigned_sat_variable_v16i8_using_min, at function
+unsigned_sat_variable_v16i8_using_min: # @unsigned_sat_variable_v16i8_using_min
+.Lfunc_begin36:
+ .cfi_startproc
+# %bb.0:
+ xxlnor 36, 35, 35
+ vminub 2, 2, 4
+ vaddubm 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end36:
+ .size unsigned_sat_variable_v16i8_using_min, .Lfunc_end36-.Lfunc_begin36
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_v16i8_using_cmp_sum # -- Begin function unsigned_sat_variable_v16i8_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_variable_v16i8_using_cmp_sum, at function
+unsigned_sat_variable_v16i8_using_cmp_sum: # @unsigned_sat_variable_v16i8_using_cmp_sum
+.Lfunc_begin37:
+ .cfi_startproc
+# %bb.0:
+ vaddubs 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end37:
+ .size unsigned_sat_variable_v16i8_using_cmp_sum, .Lfunc_end37-.Lfunc_begin37
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_v16i8_using_cmp_notval # -- Begin function unsigned_sat_variable_v16i8_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_variable_v16i8_using_cmp_notval, at function
+unsigned_sat_variable_v16i8_using_cmp_notval: # @unsigned_sat_variable_v16i8_using_cmp_notval
+.Lfunc_begin38:
+ .cfi_startproc
+# %bb.0:
+ xxlnor 36, 35, 35
+ vaddubm 3, 2, 3
+ vcmpgtub 2, 2, 4
+ xxlor 34, 34, 35
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end38:
+ .size unsigned_sat_variable_v16i8_using_cmp_notval, .Lfunc_end38-.Lfunc_begin38
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_v8i16_using_min # -- Begin function unsigned_sat_variable_v8i16_using_min
+ .p2align 4
+ .type unsigned_sat_variable_v8i16_using_min, at function
+unsigned_sat_variable_v8i16_using_min: # @unsigned_sat_variable_v8i16_using_min
+.Lfunc_begin39:
+ .cfi_startproc
+# %bb.0:
+ xxlnor 36, 35, 35
+ vminuh 2, 2, 4
+ vadduhm 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end39:
+ .size unsigned_sat_variable_v8i16_using_min, .Lfunc_end39-.Lfunc_begin39
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_v8i16_using_cmp_sum # -- Begin function unsigned_sat_variable_v8i16_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_variable_v8i16_using_cmp_sum, at function
+unsigned_sat_variable_v8i16_using_cmp_sum: # @unsigned_sat_variable_v8i16_using_cmp_sum
+.Lfunc_begin40:
+ .cfi_startproc
+# %bb.0:
+ vadduhs 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end40:
+ .size unsigned_sat_variable_v8i16_using_cmp_sum, .Lfunc_end40-.Lfunc_begin40
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_v8i16_using_cmp_notval # -- Begin function unsigned_sat_variable_v8i16_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_variable_v8i16_using_cmp_notval, at function
+unsigned_sat_variable_v8i16_using_cmp_notval: # @unsigned_sat_variable_v8i16_using_cmp_notval
+.Lfunc_begin41:
+ .cfi_startproc
+# %bb.0:
+ xxlnor 36, 35, 35
+ vadduhm 3, 2, 3
+ vcmpgtuh 2, 2, 4
+ xxlor 34, 34, 35
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end41:
+ .size unsigned_sat_variable_v8i16_using_cmp_notval, .Lfunc_end41-.Lfunc_begin41
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_v4i32_using_min # -- Begin function unsigned_sat_variable_v4i32_using_min
+ .p2align 4
+ .type unsigned_sat_variable_v4i32_using_min, at function
+unsigned_sat_variable_v4i32_using_min: # @unsigned_sat_variable_v4i32_using_min
+.Lfunc_begin42:
+ .cfi_startproc
+# %bb.0:
+ xxlnor 36, 35, 35
+ vminuw 2, 2, 4
+ vadduwm 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end42:
+ .size unsigned_sat_variable_v4i32_using_min, .Lfunc_end42-.Lfunc_begin42
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_v4i32_using_cmp_sum # -- Begin function unsigned_sat_variable_v4i32_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_variable_v4i32_using_cmp_sum, at function
+unsigned_sat_variable_v4i32_using_cmp_sum: # @unsigned_sat_variable_v4i32_using_cmp_sum
+.Lfunc_begin43:
+ .cfi_startproc
+# %bb.0:
+ vadduws 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end43:
+ .size unsigned_sat_variable_v4i32_using_cmp_sum, .Lfunc_end43-.Lfunc_begin43
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_v4i32_using_cmp_notval # -- Begin function unsigned_sat_variable_v4i32_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_variable_v4i32_using_cmp_notval, at function
+unsigned_sat_variable_v4i32_using_cmp_notval: # @unsigned_sat_variable_v4i32_using_cmp_notval
+.Lfunc_begin44:
+ .cfi_startproc
+# %bb.0:
+ xxlnor 36, 35, 35
+ vadduwm 3, 2, 3
+ vcmpgtuw 2, 2, 4
+ xxlor 34, 34, 35
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end44:
+ .size unsigned_sat_variable_v4i32_using_cmp_notval, .Lfunc_end44-.Lfunc_begin44
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_v2i64_using_min # -- Begin function unsigned_sat_variable_v2i64_using_min
+ .p2align 4
+ .type unsigned_sat_variable_v2i64_using_min, at function
+unsigned_sat_variable_v2i64_using_min: # @unsigned_sat_variable_v2i64_using_min
+.Lfunc_begin45:
+ .cfi_startproc
+# %bb.0:
+ xxlnor 36, 35, 35
+ vminud 2, 2, 4
+ vaddudm 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end45:
+ .size unsigned_sat_variable_v2i64_using_min, .Lfunc_end45-.Lfunc_begin45
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_v2i64_using_cmp_sum # -- Begin function unsigned_sat_variable_v2i64_using_cmp_sum
+ .p2align 4
+ .type unsigned_sat_variable_v2i64_using_cmp_sum, at function
+unsigned_sat_variable_v2i64_using_cmp_sum: # @unsigned_sat_variable_v2i64_using_cmp_sum
+.Lfunc_begin46:
+ .cfi_startproc
+# %bb.0:
+ xxlnor 36, 35, 35
+ vminud 2, 2, 4
+ vaddudm 2, 2, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end46:
+ .size unsigned_sat_variable_v2i64_using_cmp_sum, .Lfunc_end46-.Lfunc_begin46
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_variable_v2i64_using_cmp_notval # -- Begin function unsigned_sat_variable_v2i64_using_cmp_notval
+ .p2align 4
+ .type unsigned_sat_variable_v2i64_using_cmp_notval, at function
+unsigned_sat_variable_v2i64_using_cmp_notval: # @unsigned_sat_variable_v2i64_using_cmp_notval
+.Lfunc_begin47:
+ .cfi_startproc
+# %bb.0:
+ xxlnor 36, 35, 35
+ vaddudm 3, 2, 3
+ vcmpgtud 2, 2, 4
+ xxlor 34, 34, 35
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end47:
+ .size unsigned_sat_variable_v2i64_using_cmp_notval, .Lfunc_end47-.Lfunc_begin47
+ .cfi_endproc
+ # -- End function
+ .section .rodata.cst16,"aM", at progbits,16
+ .p2align 4, 0x0 # -- Begin function sadd
+.LCPI48_0:
+ .quad 0 # 0x0
+ .quad -9223372036854775808 # 0x8000000000000000
+ .text
+ .globl sadd
+ .p2align 4
+ .type sadd, at function
+sadd: # @sadd
+.Lfunc_begin48:
+ .cfi_startproc
+.Lfunc_gep48:
+ addis 2, 12, .TOC.-.Lfunc_gep48 at ha
+ addi 2, 2, .TOC.-.Lfunc_gep48 at l
+.Lfunc_lep48:
+ .localentry sadd, .Lfunc_lep48-.Lfunc_gep48
+# %bb.0:
+ vadduqm 0, 2, 6
+ vadduqm 10, 4, 8
+ mfocrf 12, 32
+ stw 12, 8(1)
+ xxswapd 0, 34
+ xxswapd 4, 36
+ vadduqm 1, 3, 7
+ vadduqm 11, 5, 9
+ mffprd 3, 0
+ mffprd 6, 4
+ lwz 12, 8(1)
+ xxswapd 2, 35
+ xxswapd 5, 37
+ mffprd 4, 2
+ xxswapd 1, 32
+ xxswapd 6, 42
+ mffprd 5, 1
+ cmpld 6, 5, 3
+ mffprd 7, 6
+ xxswapd 3, 33
+ xxswapd 7, 43
+ mffprd 3, 3
+ cmpld 5, 7, 6
+ mffprd 6, 5
+ mffprd 7, 7
+ mfvsrd 5, 36
+ cmpld 3, 4
+ mfvsrd 3, 34
+ cmpld 1, 7, 6
+ mfvsrd 7, 32
+ mfvsrd 4, 35
+ mfvsrd 6, 37
+ cmpld 7, 7, 3
+ cmpd 2, 7, 3
+ mfvsrd 3, 33
+ crandc 21, 8, 30
+ crand 22, 30, 24
+ cmpld 6, 3, 4
+ cmpd 7, 3, 4
+ mfvsrd 4, 42
+ sradi 3, 3, 63
+ mtocrf 32, 12
+ crnor 21, 22, 21
+ crandc 23, 28, 26
+ crand 24, 26, 0
+ cmpld 4, 5
+ cmpd 7, 4, 5
+ mfvsrd 5, 43
+ crnor 22, 24, 23
+ mtfprd 5, 3
+ sradi 4, 4, 63
+ mtfprd 6, 4
+ crandc 25, 28, 2
+ crand 20, 2, 20
+ cmpld 5, 6
+ cmpd 7, 5, 6
+ mfvsrd 6, 38
+ sradi 5, 5, 63
+ crnor 20, 20, 25
+ mtfprd 7, 5
+ sradi 6, 6, 63
+ crandc 26, 28, 2
+ crand 27, 2, 4
+ crnor 23, 27, 26
+ mtfprd 0, 6
+ mfvsrd 6, 39
+ sradi 6, 6, 63
+ mtfprd 1, 6
+ mfvsrd 6, 40
+ sradi 6, 6, 63
+ mtfprd 2, 6
+ mfvsrd 6, 41
+ sradi 6, 6, 63
+ mtfprd 3, 6
+ sradi 6, 7, 63
+ mtfprd 4, 6
+ li 6, -1
+ isel 3, 0, 6, 21
+ isel 4, 0, 6, 22
+ isel 5, 0, 6, 20
+ isel 6, 0, 6, 23
+ mtfprd 8, 3
+ addis 3, 2, .LCPI48_0 at toc@ha
+ mtfprd 10, 4
+ mtfprd 11, 5
+ mtfprd 12, 6
+ addi 3, 3, .LCPI48_0 at toc@l
+ lxvd2x 9, 0, 3
+ xxspltd 45, 6, 0
+ xxspltd 46, 7, 0
+ xxspltd 34, 0, 0
+ xxspltd 40, 5, 0
+ xxspltd 35, 1, 0
+ xxspltd 36, 2, 0
+ xxspltd 38, 3, 0
+ xxspltd 39, 4, 0
+ xxspltd 41, 8, 0
+ xxspltd 44, 10, 0
+ xxspltd 47, 11, 0
+ xxspltd 48, 12, 0
+ xxlxor 0, 34, 41
+ xxlxor 1, 35, 44
+ xxswapd 37, 9
+ xxlxor 2, 39, 37
+ xxlxor 3, 40, 37
+ xxsel 34, 32, 2, 0
+ xxsel 35, 33, 3, 1
+ xxlxor 0, 36, 47
+ xxlxor 1, 45, 37
+ xxsel 36, 42, 1, 0
+ xxlxor 0, 38, 48
+ xxlxor 1, 46, 37
+ xxsel 37, 43, 1, 0
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end48:
+ .size sadd, .Lfunc_end48-.Lfunc_begin48
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i64_with_single_use # -- Begin function unsigned_sat_constant_i64_with_single_use
+ .p2align 4
+ .type unsigned_sat_constant_i64_with_single_use, at function
+unsigned_sat_constant_i64_with_single_use: # @unsigned_sat_constant_i64_with_single_use
+.Lfunc_begin49:
+ .cfi_startproc
+# %bb.0:
+ li 4, 4
+ subc 3, 3, 4
+ li 4, 0
+ addze. 4, 4
+ iseleq 3, 0, 3
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end49:
+ .size unsigned_sat_constant_i64_with_single_use, .Lfunc_end49-.Lfunc_begin49
+ .cfi_endproc
+ # -- End function
+ .globl unsigned_sat_constant_i64_with_multiple_use # -- Begin function unsigned_sat_constant_i64_with_multiple_use
+ .p2align 4
+ .type unsigned_sat_constant_i64_with_multiple_use, at function
+unsigned_sat_constant_i64_with_multiple_use: # @unsigned_sat_constant_i64_with_multiple_use
+.Lfunc_begin50:
+ .cfi_startproc
+# %bb.0:
+ cmpldi 3, 4
+ li 5, 4
+ isellt 5, 3, 5
+ sub 3, 3, 5
+ add 4, 4, 5
+ mulld 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end50:
+ .size unsigned_sat_constant_i64_with_multiple_use, .Lfunc_end50-.Lfunc_begin50
+ .cfi_endproc
+ # -- End function
+ .section ".note.GNU-stack","", at progbits
diff --git a/llvm/test/CodeGen/RISCV/branch-on-zero.ll b/llvm/test/CodeGen/RISCV/branch-on-zero.ll
index b1d396d70ff5f..02aeebdeb3775 100644
--- a/llvm/test/CodeGen/RISCV/branch-on-zero.ll
+++ b/llvm/test/CodeGen/RISCV/branch-on-zero.ll
@@ -8,21 +8,19 @@ define i32 @optbranch_32(i32 %Arg) {
; RV32-LABEL: optbranch_32:
; RV32: # %bb.0: # %bb
; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: beqz a0, .LBB0_2
-; RV32-NEXT: # %bb.1: # %bb3
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB0_2: # %bb2
+; RV32-NEXT: bnez a0, .LBB0_2
+; RV32-NEXT: # %bb.1: # %bb2
; RV32-NEXT: li a0, -1
+; RV32-NEXT: .LBB0_2: # %bb3
; RV32-NEXT: ret
;
; RV64-LABEL: optbranch_32:
; RV64: # %bb.0: # %bb
; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: beqz a0, .LBB0_2
-; RV64-NEXT: # %bb.1: # %bb3
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB0_2: # %bb2
+; RV64-NEXT: bnez a0, .LBB0_2
+; RV64-NEXT: # %bb.1: # %bb2
; RV64-NEXT: li a0, -1
+; RV64-NEXT: .LBB0_2: # %bb3
; RV64-NEXT: ret
bb:
%i1 = icmp eq i32 %Arg, -1
@@ -43,22 +41,20 @@ define i64 @optbranch_64(i64 %Arg) {
; RV32-NEXT: seqz a2, a0
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: or a2, a0, a1
-; RV32-NEXT: beqz a2, .LBB1_2
-; RV32-NEXT: # %bb.1: # %bb3
-; RV32-NEXT: ret
-; RV32-NEXT: .LBB1_2: # %bb2
+; RV32-NEXT: bnez a2, .LBB1_2
+; RV32-NEXT: # %bb.1: # %bb2
; RV32-NEXT: li a0, -1
; RV32-NEXT: li a1, -1
+; RV32-NEXT: .LBB1_2: # %bb3
; RV32-NEXT: ret
;
; RV64-LABEL: optbranch_64:
; RV64: # %bb.0: # %bb
; RV64-NEXT: addi a0, a0, 1
-; RV64-NEXT: beqz a0, .LBB1_2
-; RV64-NEXT: # %bb.1: # %bb3
-; RV64-NEXT: ret
-; RV64-NEXT: .LBB1_2: # %bb2
+; RV64-NEXT: bnez a0, .LBB1_2
+; RV64-NEXT: # %bb.1: # %bb2
; RV64-NEXT: li a0, -1
+; RV64-NEXT: .LBB1_2: # %bb3
; RV64-NEXT: ret
bb:
%i1 = icmp eq i64 %Arg, -1
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index e10b360b35b56..376bbb7018700 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -254,33 +254,60 @@ vector.ph:
}
define <8 x i16> @test3(<8 x i16> %x, i16 zeroext %w) nounwind {
-; SSE-LABEL: test3:
-; SSE: # %bb.0: # %vector.ph
-; SSE-NEXT: movd %edi, %xmm1
-; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; SSE-NEXT: psubusw %xmm1, %xmm0
-; SSE-NEXT: retq
+; SSE2OR3-LABEL: test3:
+; SSE2OR3: # %bb.0: # %vector.ph
+; SSE2OR3-NEXT: movd %edi, %xmm1
+; SSE2OR3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2OR3-NEXT: movdqa %xmm0, %xmm3
+; SSE2OR3-NEXT: pxor %xmm1, %xmm3
+; SSE2OR3-NEXT: psubw %xmm2, %xmm0
+; SSE2OR3-NEXT: pxor %xmm0, %xmm1
+; SSE2OR3-NEXT: pcmpgtw %xmm3, %xmm1
+; SSE2OR3-NEXT: pandn %xmm0, %xmm1
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm0
+; SSE2OR3-NEXT: retq
+;
+; SSE41-LABEL: test3:
+; SSE41: # %bb.0: # %vector.ph
+; SSE41-NEXT: movd %edi, %xmm1
+; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: psubw %xmm1, %xmm2
+; SSE41-NEXT: pminuw %xmm2, %xmm0
+; SSE41-NEXT: pcmpeqw %xmm2, %xmm0
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test3:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vmovd %edi, %xmm1
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpminuw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpcmpeqw %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test3:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vmovd %edi, %xmm1
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpminuw %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpcmpeqw %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test3:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastw %edi, %xmm1
-; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vpcmpleuw %xmm0, %xmm1, %k1
+; AVX512-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <8 x i16> undef, i16 %w, i32 0
@@ -332,7 +359,11 @@ define <16 x i8> @test6(<16 x i8> %x, i8 zeroext %w) nounwind {
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; SSE2-NEXT: psubusb %xmm1, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: psubb %xmm1, %xmm2
+; SSE2-NEXT: pminub %xmm2, %xmm0
+; SSE2-NEXT: pcmpeqb %xmm2, %xmm0
+; SSE2-NEXT: pand %xmm2, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test6:
@@ -340,7 +371,11 @@ define <16 x i8> @test6(<16 x i8> %x, i8 zeroext %w) nounwind {
; SSSE3-NEXT: movd %edi, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: pshufb %xmm2, %xmm1
-; SSSE3-NEXT: psubusb %xmm1, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm2
+; SSSE3-NEXT: psubb %xmm1, %xmm2
+; SSSE3-NEXT: pminub %xmm2, %xmm0
+; SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
+; SSSE3-NEXT: pand %xmm2, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test6:
@@ -348,7 +383,11 @@ define <16 x i8> @test6(<16 x i8> %x, i8 zeroext %w) nounwind {
; SSE41-NEXT: movd %edi, %xmm1
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: pshufb %xmm2, %xmm1
-; SSE41-NEXT: psubusb %xmm1, %xmm0
+; SSE41-NEXT: movdqa %xmm0, %xmm2
+; SSE41-NEXT: psubb %xmm1, %xmm2
+; SSE41-NEXT: pminub %xmm2, %xmm0
+; SSE41-NEXT: pcmpeqb %xmm2, %xmm0
+; SSE41-NEXT: pand %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: test6:
@@ -356,20 +395,28 @@ define <16 x i8> @test6(<16 x i8> %x, i8 zeroext %w) nounwind {
; AVX1-NEXT: vmovd %edi, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpcmpeqb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test6:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vmovd %edi, %xmm1
; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
-; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm1
+; AVX2-NEXT: vpminub %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpcmpeqb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test6:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastb %edi, %xmm1
-; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm1
+; AVX512-NEXT: vpcmpleub %xmm0, %xmm1, %k1
+; AVX512-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <16 x i8> undef, i8 %w, i32 0
@@ -542,14 +589,45 @@ vector.ph:
}
define <16 x i16> @test9(<16 x i16> %x, i16 zeroext %w) nounwind {
-; SSE-LABEL: test9:
-; SSE: # %bb.0: # %vector.ph
-; SSE-NEXT: movd %edi, %xmm2
-; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
-; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
-; SSE-NEXT: psubusw %xmm2, %xmm0
-; SSE-NEXT: psubusw %xmm2, %xmm1
-; SSE-NEXT: retq
+; SSE2OR3-LABEL: test9:
+; SSE2OR3: # %bb.0: # %vector.ph
+; SSE2OR3-NEXT: movd %edi, %xmm2
+; SSE2OR3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,1,0,1]
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm5
+; SSE2OR3-NEXT: pxor %xmm2, %xmm5
+; SSE2OR3-NEXT: psubw %xmm4, %xmm1
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm3
+; SSE2OR3-NEXT: pxor %xmm2, %xmm3
+; SSE2OR3-NEXT: pcmpgtw %xmm5, %xmm3
+; SSE2OR3-NEXT: movdqa %xmm0, %xmm5
+; SSE2OR3-NEXT: pxor %xmm2, %xmm5
+; SSE2OR3-NEXT: psubw %xmm4, %xmm0
+; SSE2OR3-NEXT: pxor %xmm0, %xmm2
+; SSE2OR3-NEXT: pcmpgtw %xmm5, %xmm2
+; SSE2OR3-NEXT: pandn %xmm0, %xmm2
+; SSE2OR3-NEXT: pandn %xmm1, %xmm3
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm0
+; SSE2OR3-NEXT: movdqa %xmm3, %xmm1
+; SSE2OR3-NEXT: retq
+;
+; SSE41-LABEL: test9:
+; SSE41: # %bb.0: # %vector.ph
+; SSE41-NEXT: movd %edi, %xmm2
+; SSE41-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: psubw %xmm2, %xmm3
+; SSE41-NEXT: pminuw %xmm3, %xmm1
+; SSE41-NEXT: pcmpeqw %xmm3, %xmm1
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: psubw %xmm2, %xmm4
+; SSE41-NEXT: pminuw %xmm4, %xmm0
+; SSE41-NEXT: pcmpeqw %xmm4, %xmm0
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: pand %xmm3, %xmm1
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test9:
; AVX1: # %bb.0: # %vector.ph
@@ -557,22 +635,33 @@ define <16 x i16> @test9(<16 x i16> %x, i16 zeroext %w) nounwind {
; AVX1-NEXT: vmovd %edi, %xmm2
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
-; AVX1-NEXT: vpsubusw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpsubw %xmm2, %xmm1, %xmm3
+; AVX1-NEXT: vpminuw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpcmpeqw %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpminuw %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vpcmpeqw %xmm0, %xmm2, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
+; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test9:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vmovd %edi, %xmm1
; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX2-NEXT: vpsubusw %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpminuw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test9:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastw %edi, %ymm1
-; AVX512-NEXT: vpsubusw %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubw %ymm1, %ymm0, %ymm1
+; AVX512-NEXT: vpcmpleuw %ymm0, %ymm1, %k1
+; AVX512-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <16 x i16> undef, i16 %w, i32 0
@@ -687,8 +776,16 @@ define <32 x i8> @test12(<32 x i8> %x, i8 zeroext %w) nounwind {
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
-; SSE2-NEXT: psubusb %xmm2, %xmm0
-; SSE2-NEXT: psubusb %xmm2, %xmm1
+; SSE2-NEXT: movdqa %xmm1, %xmm3
+; SSE2-NEXT: psubb %xmm2, %xmm3
+; SSE2-NEXT: pminub %xmm3, %xmm1
+; SSE2-NEXT: pcmpeqb %xmm3, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: psubb %xmm2, %xmm4
+; SSE2-NEXT: pminub %xmm4, %xmm0
+; SSE2-NEXT: pcmpeqb %xmm4, %xmm0
+; SSE2-NEXT: pand %xmm4, %xmm0
+; SSE2-NEXT: pand %xmm3, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test12:
@@ -696,8 +793,16 @@ define <32 x i8> @test12(<32 x i8> %x, i8 zeroext %w) nounwind {
; SSSE3-NEXT: movd %edi, %xmm2
; SSSE3-NEXT: pxor %xmm3, %xmm3
; SSSE3-NEXT: pshufb %xmm3, %xmm2
-; SSSE3-NEXT: psubusb %xmm2, %xmm0
-; SSSE3-NEXT: psubusb %xmm2, %xmm1
+; SSSE3-NEXT: movdqa %xmm1, %xmm3
+; SSSE3-NEXT: psubb %xmm2, %xmm3
+; SSSE3-NEXT: pminub %xmm3, %xmm1
+; SSSE3-NEXT: pcmpeqb %xmm3, %xmm1
+; SSSE3-NEXT: movdqa %xmm0, %xmm4
+; SSSE3-NEXT: psubb %xmm2, %xmm4
+; SSSE3-NEXT: pminub %xmm4, %xmm0
+; SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
+; SSSE3-NEXT: pand %xmm4, %xmm0
+; SSSE3-NEXT: pand %xmm3, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test12:
@@ -705,8 +810,16 @@ define <32 x i8> @test12(<32 x i8> %x, i8 zeroext %w) nounwind {
; SSE41-NEXT: movd %edi, %xmm2
; SSE41-NEXT: pxor %xmm3, %xmm3
; SSE41-NEXT: pshufb %xmm3, %xmm2
-; SSE41-NEXT: psubusb %xmm2, %xmm0
-; SSE41-NEXT: psubusb %xmm2, %xmm1
+; SSE41-NEXT: movdqa %xmm1, %xmm3
+; SSE41-NEXT: psubb %xmm2, %xmm3
+; SSE41-NEXT: pminub %xmm3, %xmm1
+; SSE41-NEXT: pcmpeqb %xmm3, %xmm1
+; SSE41-NEXT: movdqa %xmm0, %xmm4
+; SSE41-NEXT: psubb %xmm2, %xmm4
+; SSE41-NEXT: pminub %xmm4, %xmm0
+; SSE41-NEXT: pcmpeqb %xmm4, %xmm0
+; SSE41-NEXT: pand %xmm4, %xmm0
+; SSE41-NEXT: pand %xmm3, %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: test12:
@@ -715,22 +828,33 @@ define <32 x i8> @test12(<32 x i8> %x, i8 zeroext %w) nounwind {
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsubusb %xmm1, %xmm2, %xmm2
-; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm3
+; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpminub %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpcmpeqb %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test12:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vmovd %edi, %xmm1
; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX2-NEXT: vpsubusb %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpminub %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test12:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastb %edi, %ymm1
-; AVX512-NEXT: vpsubusb %ymm1, %ymm0, %ymm0
+; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm1
+; AVX512-NEXT: vpcmpleub %ymm0, %ymm1, %k1
+; AVX512-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} {z}
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <32 x i8> undef, i8 %w, i32 0
@@ -744,87 +868,122 @@ vector.ph:
define <8 x i16> @test13(<8 x i16> %x, <8 x i32> %y) nounwind {
; SSE2-LABEL: test13:
; SSE2: # %bb.0: # %vector.ph
+; SSE2-NEXT: pxor %xmm3, %xmm3
+; SSE2-NEXT: movdqa %xmm0, %xmm4
+; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT: movdqa %xmm2, %xmm4
-; SSE2-NEXT: pxor %xmm3, %xmm4
-; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
-; SSE2-NEXT: movdqa %xmm5, %xmm6
-; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
-; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
-; SSE2-NEXT: pand %xmm6, %xmm2
-; SSE2-NEXT: pxor %xmm4, %xmm6
-; SSE2-NEXT: por %xmm2, %xmm6
-; SSE2-NEXT: pslld $16, %xmm6
-; SSE2-NEXT: psrad $16, %xmm6
-; SSE2-NEXT: pxor %xmm1, %xmm3
-; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
-; SSE2-NEXT: pxor %xmm5, %xmm4
-; SSE2-NEXT: pand %xmm1, %xmm5
-; SSE2-NEXT: por %xmm4, %xmm5
-; SSE2-NEXT: pslld $16, %xmm5
-; SSE2-NEXT: psrad $16, %xmm5
-; SSE2-NEXT: packssdw %xmm6, %xmm5
-; SSE2-NEXT: psubusw %xmm5, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm5
+; SSE2-NEXT: por %xmm3, %xmm5
+; SSE2-NEXT: psubd %xmm2, %xmm0
+; SSE2-NEXT: movdqa %xmm0, %xmm2
+; SSE2-NEXT: pxor %xmm3, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm2
+; SSE2-NEXT: movdqa %xmm4, %xmm5
+; SSE2-NEXT: por %xmm3, %xmm5
+; SSE2-NEXT: psubd %xmm1, %xmm4
+; SSE2-NEXT: pxor %xmm4, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm5, %xmm3
+; SSE2-NEXT: packssdw %xmm2, %xmm3
+; SSE2-NEXT: pslld $16, %xmm0
+; SSE2-NEXT: psrad $16, %xmm0
+; SSE2-NEXT: pslld $16, %xmm4
+; SSE2-NEXT: psrad $16, %xmm4
+; SSE2-NEXT: packssdw %xmm0, %xmm4
+; SSE2-NEXT: pandn %xmm4, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test13:
; SSSE3: # %bb.0: # %vector.ph
+; SSSE3-NEXT: pxor %xmm3, %xmm3
+; SSSE3-NEXT: movdqa %xmm0, %xmm4
+; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
+; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; SSSE3-NEXT: movdqa %xmm2, %xmm4
-; SSSE3-NEXT: pxor %xmm3, %xmm4
-; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
-; SSSE3-NEXT: movdqa %xmm5, %xmm6
-; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
-; SSSE3-NEXT: pcmpeqd %xmm4, %xmm4
-; SSSE3-NEXT: pand %xmm6, %xmm2
-; SSSE3-NEXT: pxor %xmm4, %xmm6
-; SSSE3-NEXT: por %xmm2, %xmm6
-; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSSE3-NEXT: pshufb %xmm2, %xmm6
-; SSSE3-NEXT: pxor %xmm1, %xmm3
-; SSSE3-NEXT: pcmpgtd %xmm3, %xmm5
-; SSSE3-NEXT: pxor %xmm5, %xmm4
-; SSSE3-NEXT: pand %xmm1, %xmm5
-; SSSE3-NEXT: por %xmm4, %xmm5
-; SSSE3-NEXT: pshufb %xmm2, %xmm5
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
-; SSSE3-NEXT: psubusw %xmm5, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm5
+; SSSE3-NEXT: por %xmm3, %xmm5
+; SSSE3-NEXT: psubd %xmm2, %xmm0
+; SSSE3-NEXT: movdqa %xmm0, %xmm2
+; SSSE3-NEXT: pxor %xmm3, %xmm2
+; SSSE3-NEXT: pcmpgtd %xmm5, %xmm2
+; SSSE3-NEXT: movdqa %xmm4, %xmm5
+; SSSE3-NEXT: por %xmm3, %xmm5
+; SSSE3-NEXT: psubd %xmm1, %xmm4
+; SSSE3-NEXT: pxor %xmm4, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm5, %xmm3
+; SSSE3-NEXT: packssdw %xmm2, %xmm3
+; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: pshufb %xmm1, %xmm0
+; SSSE3-NEXT: pshufb %xmm1, %xmm4
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0]
+; SSSE3-NEXT: pandn %xmm4, %xmm3
+; SSSE3-NEXT: movdqa %xmm3, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test13:
; SSE41: # %bb.0: # %vector.ph
-; SSE41-NEXT: pmovsxbw {{.*#+}} xmm3 = [65535,0,65535,0,65535,0,65535,0]
-; SSE41-NEXT: pminud %xmm3, %xmm2
-; SSE41-NEXT: pminud %xmm3, %xmm1
-; SSE41-NEXT: packusdw %xmm2, %xmm1
-; SSE41-NEXT: psubusw %xmm1, %xmm0
+; SSE41-NEXT: pxor %xmm3, %xmm3
+; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
+; SSE41-NEXT: movdqa %xmm0, %xmm5
+; SSE41-NEXT: psubd %xmm2, %xmm5
+; SSE41-NEXT: pminud %xmm5, %xmm0
+; SSE41-NEXT: pcmpeqd %xmm5, %xmm0
+; SSE41-NEXT: movdqa %xmm4, %xmm2
+; SSE41-NEXT: psubd %xmm1, %xmm2
+; SSE41-NEXT: pminud %xmm2, %xmm4
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm4
+; SSE41-NEXT: packssdw %xmm0, %xmm4
+; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0],xmm3[1],xmm5[2],xmm3[3],xmm5[4],xmm3[5],xmm5[6],xmm3[7]
+; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
+; SSE41-NEXT: packusdw %xmm5, %xmm2
+; SSE41-NEXT: pand %xmm4, %xmm2
+; SSE41-NEXT: movdqa %xmm2, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: test13:
; AVX1: # %bb.0: # %vector.ph
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [65535,65535,65535,65535]
-; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
+; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpsubd %xmm4, %xmm3, %xmm4
+; AVX1-NEXT: vpminud %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm1
+; AVX1-NEXT: vpminud %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
+; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0],xmm2[1],xmm4[2],xmm2[3],xmm4[4],xmm2[5],xmm4[6],xmm2[7]
+; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
+; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: test13:
; AVX2: # %bb.0: # %vector.ph
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX2-NEXT: vpminud %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpminud %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpcmpeqd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
+; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
+; AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test13:
; AVX512: # %bb.0: # %vector.ph
-; AVX512-NEXT: vpmovusdw %ymm1, %xmm1
-; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
+; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm1
+; AVX512-NEXT: vpcmpleud %ymm0, %ymm1, %k1
+; AVX512-NEXT: vpmovdw %ymm1, %xmm0 {%k1} {z}
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
vector.ph:
@@ -840,80 +999,92 @@ vector.ph:
define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
; SSE2OR3-LABEL: test14:
; SSE2OR3: # %bb.0: # %vector.ph
-; SSE2OR3-NEXT: pxor %xmm6, %xmm6
; SSE2OR3-NEXT: movdqa %xmm0, %xmm5
-; SSE2OR3-NEXT: movdqa %xmm4, %xmm7
+; SSE2OR3-NEXT: pxor %xmm0, %xmm0
+; SSE2OR3-NEXT: movdqa %xmm5, %xmm7
+; SSE2OR3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3],xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
+; SSE2OR3-NEXT: movdqa %xmm7, %xmm6
+; SSE2OR3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3]
+; SSE2OR3-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
+; SSE2OR3-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
+; SSE2OR3-NEXT: movdqa %xmm5, %xmm8
+; SSE2OR3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm0[0],xmm8[1],xmm0[1],xmm8[2],xmm0[2],xmm8[3],xmm0[3]
+; SSE2OR3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2OR3-NEXT: movdqa %xmm4, %xmm9
+; SSE2OR3-NEXT: pxor %xmm0, %xmm9
+; SSE2OR3-NEXT: psubd %xmm5, %xmm4
+; SSE2OR3-NEXT: movdqa %xmm4, %xmm5
+; SSE2OR3-NEXT: pxor %xmm0, %xmm5
+; SSE2OR3-NEXT: pcmpgtd %xmm9, %xmm5
+; SSE2OR3-NEXT: movdqa %xmm3, %xmm9
+; SSE2OR3-NEXT: pxor %xmm0, %xmm9
+; SSE2OR3-NEXT: psubd %xmm8, %xmm3
; SSE2OR3-NEXT: movdqa %xmm3, %xmm8
-; SSE2OR3-NEXT: movdqa %xmm2, %xmm9
-; SSE2OR3-NEXT: movdqa {{.*#+}} xmm10 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSE2OR3-NEXT: pand %xmm10, %xmm4
-; SSE2OR3-NEXT: pand %xmm10, %xmm3
+; SSE2OR3-NEXT: pxor %xmm0, %xmm8
+; SSE2OR3-NEXT: pcmpgtd %xmm9, %xmm8
+; SSE2OR3-NEXT: packssdw %xmm5, %xmm8
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm5
+; SSE2OR3-NEXT: pxor %xmm0, %xmm5
+; SSE2OR3-NEXT: psubd %xmm7, %xmm2
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm7
+; SSE2OR3-NEXT: pxor %xmm0, %xmm7
+; SSE2OR3-NEXT: pcmpgtd %xmm5, %xmm7
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm5
+; SSE2OR3-NEXT: pxor %xmm0, %xmm5
+; SSE2OR3-NEXT: psubd %xmm6, %xmm1
+; SSE2OR3-NEXT: pxor %xmm1, %xmm0
+; SSE2OR3-NEXT: pcmpgtd %xmm5, %xmm0
+; SSE2OR3-NEXT: packssdw %xmm7, %xmm0
+; SSE2OR3-NEXT: packsswb %xmm8, %xmm0
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2OR3-NEXT: pand %xmm5, %xmm4
+; SSE2OR3-NEXT: pand %xmm5, %xmm3
; SSE2OR3-NEXT: packuswb %xmm4, %xmm3
-; SSE2OR3-NEXT: movdqa %xmm1, %xmm4
-; SSE2OR3-NEXT: pand %xmm10, %xmm2
-; SSE2OR3-NEXT: pand %xmm10, %xmm1
+; SSE2OR3-NEXT: pand %xmm5, %xmm2
+; SSE2OR3-NEXT: pand %xmm5, %xmm1
; SSE2OR3-NEXT: packuswb %xmm2, %xmm1
; SSE2OR3-NEXT: packuswb %xmm3, %xmm1
-; SSE2OR3-NEXT: psubb %xmm0, %xmm1
-; SSE2OR3-NEXT: movdqa %xmm0, %xmm2
-; SSE2OR3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3],xmm2[4],xmm6[4],xmm2[5],xmm6[5],xmm2[6],xmm6[6],xmm2[7],xmm6[7]
-; SSE2OR3-NEXT: movdqa %xmm2, %xmm0
-; SSE2OR3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
-; SSE2OR3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm6[4],xmm2[5],xmm6[5],xmm2[6],xmm6[6],xmm2[7],xmm6[7]
-; SSE2OR3-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm6[8],xmm5[9],xmm6[9],xmm5[10],xmm6[10],xmm5[11],xmm6[11],xmm5[12],xmm6[12],xmm5[13],xmm6[13],xmm5[14],xmm6[14],xmm5[15],xmm6[15]
-; SSE2OR3-NEXT: movdqa %xmm5, %xmm3
-; SSE2OR3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
-; SSE2OR3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm6[4],xmm5[5],xmm6[5],xmm5[6],xmm6[6],xmm5[7],xmm6[7]
-; SSE2OR3-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2OR3-NEXT: pxor %xmm6, %xmm7
-; SSE2OR3-NEXT: por %xmm6, %xmm5
-; SSE2OR3-NEXT: pcmpgtd %xmm7, %xmm5
-; SSE2OR3-NEXT: pxor %xmm6, %xmm8
-; SSE2OR3-NEXT: por %xmm6, %xmm3
-; SSE2OR3-NEXT: pcmpgtd %xmm8, %xmm3
-; SSE2OR3-NEXT: packssdw %xmm5, %xmm3
-; SSE2OR3-NEXT: pxor %xmm6, %xmm9
-; SSE2OR3-NEXT: por %xmm6, %xmm2
-; SSE2OR3-NEXT: pcmpgtd %xmm9, %xmm2
-; SSE2OR3-NEXT: pxor %xmm6, %xmm4
-; SSE2OR3-NEXT: por %xmm6, %xmm0
-; SSE2OR3-NEXT: pcmpgtd %xmm4, %xmm0
-; SSE2OR3-NEXT: packssdw %xmm2, %xmm0
-; SSE2OR3-NEXT: packsswb %xmm3, %xmm0
; SSE2OR3-NEXT: pandn %xmm1, %xmm0
; SSE2OR3-NEXT: retq
;
; SSE41-LABEL: test14:
; SSE41: # %bb.0: # %vector.ph
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,1,1]
-; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
-; SSE41-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,2,3]
-; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
-; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[3,3,3,3]
-; SSE41-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero,xmm8[2],zero,zero,zero,xmm8[3],zero,zero,zero
-; SSE41-NEXT: pmaxud %xmm4, %xmm8
-; SSE41-NEXT: pcmpeqd %xmm4, %xmm8
-; SSE41-NEXT: pmaxud %xmm3, %xmm7
-; SSE41-NEXT: pcmpeqd %xmm3, %xmm7
-; SSE41-NEXT: packssdw %xmm8, %xmm7
-; SSE41-NEXT: pmaxud %xmm1, %xmm6
-; SSE41-NEXT: pcmpeqd %xmm1, %xmm6
-; SSE41-NEXT: pmaxud %xmm2, %xmm5
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm5
-; SSE41-NEXT: packssdw %xmm5, %xmm6
-; SSE41-NEXT: packsswb %xmm7, %xmm6
-; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = [255,255,255,255]
-; SSE41-NEXT: pand %xmm5, %xmm4
-; SSE41-NEXT: pand %xmm5, %xmm3
-; SSE41-NEXT: packusdw %xmm4, %xmm3
-; SSE41-NEXT: pand %xmm5, %xmm2
-; SSE41-NEXT: pand %xmm1, %xmm5
-; SSE41-NEXT: packusdw %xmm2, %xmm5
-; SSE41-NEXT: packuswb %xmm3, %xmm5
-; SSE41-NEXT: psubb %xmm0, %xmm5
-; SSE41-NEXT: pand %xmm6, %xmm5
-; SSE41-NEXT: movdqa %xmm5, %xmm0
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm9 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
+; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT: movdqa %xmm4, %xmm6
+; SSE41-NEXT: psubd %xmm0, %xmm6
+; SSE41-NEXT: pminud %xmm6, %xmm4
+; SSE41-NEXT: pcmpeqd %xmm6, %xmm4
+; SSE41-NEXT: movdqa %xmm3, %xmm5
+; SSE41-NEXT: psubd %xmm9, %xmm5
+; SSE41-NEXT: pminud %xmm5, %xmm3
+; SSE41-NEXT: pcmpeqd %xmm5, %xmm3
+; SSE41-NEXT: packssdw %xmm4, %xmm3
+; SSE41-NEXT: movdqa %xmm1, %xmm0
+; SSE41-NEXT: psubd %xmm8, %xmm0
+; SSE41-NEXT: pminud %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE41-NEXT: movdqa %xmm2, %xmm4
+; SSE41-NEXT: psubd %xmm7, %xmm4
+; SSE41-NEXT: pminud %xmm4, %xmm2
+; SSE41-NEXT: pcmpeqd %xmm4, %xmm2
+; SSE41-NEXT: packssdw %xmm2, %xmm1
+; SSE41-NEXT: packsswb %xmm3, %xmm1
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255]
+; SSE41-NEXT: pand %xmm2, %xmm6
+; SSE41-NEXT: pand %xmm2, %xmm5
+; SSE41-NEXT: packusdw %xmm6, %xmm5
+; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: pand %xmm2, %xmm4
+; SSE41-NEXT: packusdw %xmm4, %xmm0
+; SSE41-NEXT: packuswb %xmm5, %xmm0
+; SSE41-NEXT: pand %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: test14:
@@ -923,31 +1094,34 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[3,3,3,3]
-; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
-; AVX1-NEXT: vpmaxud %xmm6, %xmm7, %xmm6
-; AVX1-NEXT: vpcmpeqd %xmm6, %xmm7, %xmm6
-; AVX1-NEXT: vpmaxud %xmm5, %xmm2, %xmm5
-; AVX1-NEXT: vpcmpeqd %xmm5, %xmm2, %xmm5
-; AVX1-NEXT: vpackssdw %xmm6, %xmm5, %xmm5
-; AVX1-NEXT: vpmaxud %xmm4, %xmm1, %xmm4
-; AVX1-NEXT: vpcmpeqd %xmm4, %xmm1, %xmm4
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
-; AVX1-NEXT: vpmaxud %xmm3, %xmm6, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm6, %xmm3
-; AVX1-NEXT: vpackssdw %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vpacksswb %xmm5, %xmm3, %xmm3
-; AVX1-NEXT: vbroadcastss {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255]
-; AVX1-NEXT: vandps %ymm4, %ymm2, %ymm2
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
-; AVX1-NEXT: vpackusdw %xmm5, %xmm2, %xmm2
-; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vpackusdw %xmm4, %xmm1, %xmm1
-; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
+; AVX1-NEXT: vpsubd %xmm0, %xmm6, %xmm0
+; AVX1-NEXT: vpminud %xmm6, %xmm0, %xmm6
+; AVX1-NEXT: vpcmpeqd %xmm6, %xmm0, %xmm6
+; AVX1-NEXT: vpsubd %xmm5, %xmm2, %xmm5
+; AVX1-NEXT: vpminud %xmm2, %xmm5, %xmm2
+; AVX1-NEXT: vpcmpeqd %xmm2, %xmm5, %xmm2
+; AVX1-NEXT: vpackssdw %xmm6, %xmm2, %xmm2
+; AVX1-NEXT: vpsubd %xmm4, %xmm1, %xmm4
+; AVX1-NEXT: vpminud %xmm1, %xmm4, %xmm6
+; AVX1-NEXT: vpcmpeqd %xmm6, %xmm4, %xmm6
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
+; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm3
+; AVX1-NEXT: vpminud %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpcmpeqd %xmm1, %xmm3, %xmm1
+; AVX1-NEXT: vpackssdw %xmm1, %xmm6, %xmm1
+; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255]
+; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5
+; AVX1-NEXT: vpackusdw %xmm0, %xmm5, %xmm0
+; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm4
+; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
+; AVX1-NEXT: vpackusdw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -955,35 +1129,38 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
-; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-NEXT: vpmaxud %ymm4, %ymm1, %ymm4
-; AVX2-NEXT: vpcmpeqd %ymm4, %ymm1, %ymm4
-; AVX2-NEXT: vpmaxud %ymm3, %ymm2, %ymm3
-; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm3
-; AVX2-NEXT: vpackssdw %ymm3, %ymm4, %ymm3
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
+; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm1
; AVX2-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
-; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm3
-; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
-; AVX2-NEXT: vpacksswb %xmm4, %xmm3, %xmm3
-; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,2,1,3]
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
-; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1
-; AVX2-NEXT: vpackusdw %ymm2, %ymm1, %ymm1
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,1,3]
-; AVX2-NEXT: vpsubb %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpandn %xmm0, %xmm3, %xmm0
+; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm1
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm5
+; AVX2-NEXT: vpackssdw %xmm5, %xmm1, %xmm1
+; AVX2-NEXT: vpsubd %ymm3, %ymm2, %ymm3
+; AVX2-NEXT: vpminud %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpcmpeqd %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm2
+; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
+; AVX2-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
+; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm2
+; AVX2-NEXT: vpackusdw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
+; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test14:
; AVX512: # %bb.0: # %vector.ph
-; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512-NEXT: vpcmpnltud %zmm2, %zmm1, %k1
-; AVX512-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512-NEXT: vpsubb %xmm0, %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; AVX512-NEXT: vpsubd %zmm0, %zmm1, %zmm0
+; AVX512-NEXT: vpcmpleud %zmm1, %zmm0, %k1
+; AVX512-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z}
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
vector.ph:
@@ -1221,10 +1398,26 @@ define <64 x i8> @test17(<64 x i8> %x, i8 zeroext %w) nounwind {
; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
-; SSE2-NEXT: psubusb %xmm4, %xmm0
-; SSE2-NEXT: psubusb %xmm4, %xmm1
-; SSE2-NEXT: psubusb %xmm4, %xmm2
-; SSE2-NEXT: psubusb %xmm4, %xmm3
+; SSE2-NEXT: movdqa %xmm3, %xmm5
+; SSE2-NEXT: psubb %xmm4, %xmm5
+; SSE2-NEXT: pminub %xmm5, %xmm3
+; SSE2-NEXT: pcmpeqb %xmm5, %xmm3
+; SSE2-NEXT: movdqa %xmm2, %xmm6
+; SSE2-NEXT: psubb %xmm4, %xmm6
+; SSE2-NEXT: pminub %xmm6, %xmm2
+; SSE2-NEXT: pcmpeqb %xmm6, %xmm2
+; SSE2-NEXT: movdqa %xmm1, %xmm7
+; SSE2-NEXT: psubb %xmm4, %xmm7
+; SSE2-NEXT: pminub %xmm7, %xmm1
+; SSE2-NEXT: pcmpeqb %xmm7, %xmm1
+; SSE2-NEXT: movdqa %xmm0, %xmm8
+; SSE2-NEXT: psubb %xmm4, %xmm8
+; SSE2-NEXT: pminub %xmm8, %xmm0
+; SSE2-NEXT: pcmpeqb %xmm8, %xmm0
+; SSE2-NEXT: pand %xmm8, %xmm0
+; SSE2-NEXT: pand %xmm7, %xmm1
+; SSE2-NEXT: pand %xmm6, %xmm2
+; SSE2-NEXT: pand %xmm5, %xmm3
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test17:
@@ -1232,10 +1425,26 @@ define <64 x i8> @test17(<64 x i8> %x, i8 zeroext %w) nounwind {
; SSSE3-NEXT: movd %edi, %xmm4
; SSSE3-NEXT: pxor %xmm5, %xmm5
; SSSE3-NEXT: pshufb %xmm5, %xmm4
-; SSSE3-NEXT: psubusb %xmm4, %xmm0
-; SSSE3-NEXT: psubusb %xmm4, %xmm1
-; SSSE3-NEXT: psubusb %xmm4, %xmm2
-; SSSE3-NEXT: psubusb %xmm4, %xmm3
+; SSSE3-NEXT: movdqa %xmm3, %xmm5
+; SSSE3-NEXT: psubb %xmm4, %xmm5
+; SSSE3-NEXT: pminub %xmm5, %xmm3
+; SSSE3-NEXT: pcmpeqb %xmm5, %xmm3
+; SSSE3-NEXT: movdqa %xmm2, %xmm6
+; SSSE3-NEXT: psubb %xmm4, %xmm6
+; SSSE3-NEXT: pminub %xmm6, %xmm2
+; SSSE3-NEXT: pcmpeqb %xmm6, %xmm2
+; SSSE3-NEXT: movdqa %xmm1, %xmm7
+; SSSE3-NEXT: psubb %xmm4, %xmm7
+; SSSE3-NEXT: pminub %xmm7, %xmm1
+; SSSE3-NEXT: pcmpeqb %xmm7, %xmm1
+; SSSE3-NEXT: movdqa %xmm0, %xmm8
+; SSSE3-NEXT: psubb %xmm4, %xmm8
+; SSSE3-NEXT: pminub %xmm8, %xmm0
+; SSSE3-NEXT: pcmpeqb %xmm8, %xmm0
+; SSSE3-NEXT: pand %xmm8, %xmm0
+; SSSE3-NEXT: pand %xmm7, %xmm1
+; SSSE3-NEXT: pand %xmm6, %xmm2
+; SSSE3-NEXT: pand %xmm5, %xmm3
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test17:
@@ -1243,10 +1452,26 @@ define <64 x i8> @test17(<64 x i8> %x, i8 zeroext %w) nounwind {
; SSE41-NEXT: movd %edi, %xmm4
; SSE41-NEXT: pxor %xmm5, %xmm5
; SSE41-NEXT: pshufb %xmm5, %xmm4
-; SSE41-NEXT: psubusb %xmm4, %xmm0
-; SSE41-NEXT: psubusb %xmm4, %xmm1
-; SSE41-NEXT: psubusb %xmm4, %xmm2
-; SSE41-NEXT: psubusb %xmm4, %xmm3
+; SSE41-NEXT: movdqa %xmm3, %xmm5
+; SSE41-NEXT: psubb %xmm4, %xmm5
+; SSE41-NEXT: pminub %xmm5, %xmm3
+; SSE41-NEXT: pcmpeqb %xmm5, %xmm3
+; SSE41-NEXT: movdqa %xmm2, %xmm6
+; SSE41-NEXT: psubb %xmm4, %xmm6
+; SSE41-NEXT: pminub %xmm6, %xmm2
+; SSE41-NEXT: pcmpeqb %xmm6, %xmm2
+; SSE41-NEXT: movdqa %xmm1, %xmm7
+; SSE41-NEXT: psubb %xmm4, %xmm7
+; SSE41-NEXT: pminub %xmm7, %xmm1
+; SSE41-NEXT: pcmpeqb %xmm7, %xmm1
+; SSE41-NEXT: movdqa %xmm0, %xmm8
+; SSE41-NEXT: psubb %xmm4, %xmm8
+; SSE41-NEXT: pminub %xmm8, %xmm0
+; SSE41-NEXT: pcmpeqb %xmm8, %xmm0
+; SSE41-NEXT: pand %xmm8, %xmm0
+; SSE41-NEXT: pand %xmm7, %xmm1
+; SSE41-NEXT: pand %xmm6, %xmm2
+; SSE41-NEXT: pand %xmm5, %xmm3
; SSE41-NEXT: retq
;
; AVX1-LABEL: test17:
@@ -1254,28 +1479,48 @@ define <64 x i8> @test17(<64 x i8> %x, i8 zeroext %w) nounwind {
; AVX1-NEXT: vmovd %edi, %xmm2
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpsubusb %xmm2, %xmm3, %xmm3
-; AVX1-NEXT: vpsubusb %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpsubusb %xmm2, %xmm3, %xmm3
-; AVX1-NEXT: vpsubusb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm4
+; AVX1-NEXT: vpminub %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpcmpeqb %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm5
+; AVX1-NEXT: vpminub %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpcmpeqb %xmm1, %xmm5, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
+; AVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm6
+; AVX1-NEXT: vpminub %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpcmpeqb %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm2
+; AVX1-NEXT: vpminub %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vpcmpeqb %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm3
+; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test17:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vmovd %edi, %xmm2
; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
-; AVX2-NEXT: vpsubusb %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsubusb %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpsubb %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpminub %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpcmpeqb %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpsubb %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpminub %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: test17:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastb %edi, %zmm1
-; AVX512-NEXT: vpsubusb %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm1
+; AVX512-NEXT: vpcmpleub %zmm0, %zmm1, %k1
+; AVX512-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} {z}
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <64 x i8> undef, i8 %w, i32 0
@@ -1287,44 +1532,119 @@ vector.ph:
}
define <32 x i16> @test18(<32 x i16> %x, i16 zeroext %w) nounwind {
-; SSE-LABEL: test18:
-; SSE: # %bb.0: # %vector.ph
-; SSE-NEXT: movd %edi, %xmm4
-; SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,0,0,0,4,5,6,7]
-; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
-; SSE-NEXT: psubusw %xmm4, %xmm0
-; SSE-NEXT: psubusw %xmm4, %xmm1
-; SSE-NEXT: psubusw %xmm4, %xmm2
-; SSE-NEXT: psubusw %xmm4, %xmm3
-; SSE-NEXT: retq
+; SSE2OR3-LABEL: test18:
+; SSE2OR3: # %bb.0: # %vector.ph
+; SSE2OR3-NEXT: movdqa %xmm0, %xmm5
+; SSE2OR3-NEXT: movd %edi, %xmm0
+; SSE2OR3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
+; SSE2OR3-NEXT: pshufd {{.*#+}} xmm8 = xmm0[0,1,0,1]
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE2OR3-NEXT: movdqa %xmm3, %xmm6
+; SSE2OR3-NEXT: pxor %xmm0, %xmm6
+; SSE2OR3-NEXT: psubw %xmm8, %xmm3
+; SSE2OR3-NEXT: movdqa %xmm3, %xmm4
+; SSE2OR3-NEXT: pxor %xmm0, %xmm4
+; SSE2OR3-NEXT: pcmpgtw %xmm6, %xmm4
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm7
+; SSE2OR3-NEXT: pxor %xmm0, %xmm7
+; SSE2OR3-NEXT: psubw %xmm8, %xmm2
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm6
+; SSE2OR3-NEXT: pxor %xmm0, %xmm6
+; SSE2OR3-NEXT: pcmpgtw %xmm7, %xmm6
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm9
+; SSE2OR3-NEXT: pxor %xmm0, %xmm9
+; SSE2OR3-NEXT: psubw %xmm8, %xmm1
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm7
+; SSE2OR3-NEXT: pxor %xmm0, %xmm7
+; SSE2OR3-NEXT: pcmpgtw %xmm9, %xmm7
+; SSE2OR3-NEXT: movdqa %xmm5, %xmm9
+; SSE2OR3-NEXT: pxor %xmm0, %xmm9
+; SSE2OR3-NEXT: psubw %xmm8, %xmm5
+; SSE2OR3-NEXT: pxor %xmm5, %xmm0
+; SSE2OR3-NEXT: pcmpgtw %xmm9, %xmm0
+; SSE2OR3-NEXT: pandn %xmm5, %xmm0
+; SSE2OR3-NEXT: pandn %xmm1, %xmm7
+; SSE2OR3-NEXT: pandn %xmm2, %xmm6
+; SSE2OR3-NEXT: pandn %xmm3, %xmm4
+; SSE2OR3-NEXT: movdqa %xmm7, %xmm1
+; SSE2OR3-NEXT: movdqa %xmm6, %xmm2
+; SSE2OR3-NEXT: movdqa %xmm4, %xmm3
+; SSE2OR3-NEXT: retq
+;
+; SSE41-LABEL: test18:
+; SSE41: # %bb.0: # %vector.ph
+; SSE41-NEXT: movd %edi, %xmm4
+; SSE41-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,0,0,0,4,5,6,7]
+; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
+; SSE41-NEXT: movdqa %xmm3, %xmm5
+; SSE41-NEXT: psubw %xmm4, %xmm5
+; SSE41-NEXT: pminuw %xmm5, %xmm3
+; SSE41-NEXT: pcmpeqw %xmm5, %xmm3
+; SSE41-NEXT: movdqa %xmm2, %xmm6
+; SSE41-NEXT: psubw %xmm4, %xmm6
+; SSE41-NEXT: pminuw %xmm6, %xmm2
+; SSE41-NEXT: pcmpeqw %xmm6, %xmm2
+; SSE41-NEXT: movdqa %xmm1, %xmm7
+; SSE41-NEXT: psubw %xmm4, %xmm7
+; SSE41-NEXT: pminuw %xmm7, %xmm1
+; SSE41-NEXT: pcmpeqw %xmm7, %xmm1
+; SSE41-NEXT: movdqa %xmm0, %xmm8
+; SSE41-NEXT: psubw %xmm4, %xmm8
+; SSE41-NEXT: pminuw %xmm8, %xmm0
+; SSE41-NEXT: pcmpeqw %xmm8, %xmm0
+; SSE41-NEXT: pand %xmm8, %xmm0
+; SSE41-NEXT: pand %xmm7, %xmm1
+; SSE41-NEXT: pand %xmm6, %xmm2
+; SSE41-NEXT: pand %xmm5, %xmm3
+; SSE41-NEXT: retq
;
; AVX1-LABEL: test18:
; AVX1: # %bb.0: # %vector.ph
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
; AVX1-NEXT: vmovd %edi, %xmm3
; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,1]
-; AVX1-NEXT: vpsubusw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsubusw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
-; AVX1-NEXT: vpsubusw %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vpsubusw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpsubw %xmm3, %xmm2, %xmm4
+; AVX1-NEXT: vpminuw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpcmpeqw %xmm2, %xmm4, %xmm2
+; AVX1-NEXT: vpsubw %xmm3, %xmm1, %xmm5
+; AVX1-NEXT: vpminuw %xmm1, %xmm5, %xmm1
+; AVX1-NEXT: vpcmpeqw %xmm1, %xmm5, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
+; AVX1-NEXT: vpsubw %xmm3, %xmm2, %xmm6
+; AVX1-NEXT: vpminuw %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vpcmpeqw %xmm2, %xmm6, %xmm2
+; AVX1-NEXT: vpsubw %xmm3, %xmm0, %xmm3
+; AVX1-NEXT: vpminuw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpcmpeqw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm2
+; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm3, %ymm2
+; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test18:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vmovd %edi, %xmm2
; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
-; AVX2-NEXT: vpsubusw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpsubusw %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vpsubw %ymm2, %ymm1, %ymm3
+; AVX2-NEXT: vpminuw %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
+; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm2
+; AVX2-NEXT: vpminuw %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0
+; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: test18:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastw %edi, %zmm1
-; AVX512-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
+; AVX512-NEXT: vpsubw %zmm1, %zmm0, %zmm1
+; AVX512-NEXT: vpcmpleuw %zmm0, %zmm1, %k1
+; AVX512-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z}
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <32 x i16> undef, i16 %w, i32 0
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 1b307b30d8c0d..4e31b48ec5cec 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -2065,11 +2065,10 @@ define i64 @PR51612(i64 %x, i64 %y) {
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %edx
-; ATHLON-NEXT: addl $1, %ecx
-; ATHLON-NEXT: adcl $0, %edx
-; ATHLON-NEXT: incl %eax
-; ATHLON-NEXT: orl %ecx, %edx
-; ATHLON-NEXT: cmovnel %ecx, %eax
+; ATHLON-NEXT: incl %edx
+; ATHLON-NEXT: addl $1, %eax
+; ATHLON-NEXT: adcl $0, %ecx
+; ATHLON-NEXT: cmovbl %edx, %eax
; ATHLON-NEXT: andl 10, %eax
; ATHLON-NEXT: xorl %edx, %edx
; ATHLON-NEXT: retl
@@ -2078,8 +2077,7 @@ define i64 @PR51612(i64 %x, i64 %y) {
; MCU: # %bb.0:
; MCU-NEXT: addl $1, %eax
; MCU-NEXT: adcl $0, %edx
-; MCU-NEXT: orl %eax, %edx
-; MCU-NEXT: jne .LBB45_2
+; MCU-NEXT: jae .LBB45_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: movl {{[0-9]+}}(%esp), %eax
; MCU-NEXT: incl %eax
diff --git a/test_direct_uaddo.ll b/test_direct_uaddo.ll
new file mode 100644
index 0000000000000..a923d212bbf90
--- /dev/null
+++ b/test_direct_uaddo.ll
@@ -0,0 +1,9 @@
+define i32 @test_direct_uaddo(i32 %x, i32 %y) {
+ %result = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+ %ovf = extractvalue {i32, i1} %result, 1
+ %val = extractvalue {i32, i1} %result, 0
+ %sel = select i1 %ovf, i32 -1, i32 %val
+ ret i32 %sel
+}
+
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
diff --git a/test_sat_pattern.ll b/test_sat_pattern.ll
new file mode 100644
index 0000000000000..150c8081a77ac
--- /dev/null
+++ b/test_sat_pattern.ll
@@ -0,0 +1,6 @@
+define <8 x i16> @test_sat_pattern(<8 x i16> %x, <8 x i16> %y) {
+ %a = add <8 x i16> %x, %y
+ %c = icmp ugt <8 x i16> %x, %a
+ %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
+ ret <8 x i16> %r
+}
diff --git a/test_sat_pattern.s b/test_sat_pattern.s
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/test_scalar_sat.ll b/test_scalar_sat.ll
new file mode 100644
index 0000000000000..6ef9729e66a75
--- /dev/null
+++ b/test_scalar_sat.ll
@@ -0,0 +1,6 @@
+define i8 @test_scalar_sat(i8 %x) {
+ %a = add i8 %x, 42
+ %c = icmp ugt i8 %x, %a
+ %r = select i1 %c, i8 -1, i8 %a
+ ret i8 %r
+}
diff --git a/test_uaddo_conversion.ll b/test_uaddo_conversion.ll
new file mode 100644
index 0000000000000..ca433863997b7
--- /dev/null
+++ b/test_uaddo_conversion.ll
@@ -0,0 +1,9 @@
+define i32 @test_uaddo_to_uaddsat(i32 %x, i32 %y) {
+ %result = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+ %val = extractvalue {i32, i1} %result, 0
+ %ovf = extractvalue {i32, i1} %result, 1
+ %sel = select i1 %ovf, i32 -1, i32 %val
+ ret i32 %sel
+}
+
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
diff --git a/test_uaddo_only.ll b/test_uaddo_only.ll
new file mode 100644
index 0000000000000..4f7056148fa99
--- /dev/null
+++ b/test_uaddo_only.ll
@@ -0,0 +1,6 @@
+define i32 @test_uaddo_only(i32 %x, i32 %y) {
+ %add = add i32 %x, %y
+ %cmp = icmp ugt i32 %x, %add
+ %sel = select i1 %cmp, i32 -1, i32 %add
+ ret i32 %sel
+}
diff --git a/test_uaddo_only.s b/test_uaddo_only.s
new file mode 100644
index 0000000000000..e04ea329bd8e9
--- /dev/null
+++ b/test_uaddo_only.s
@@ -0,0 +1,22 @@
+ .abiversion 2
+ .file "test_uaddo_only.ll"
+ .text
+ .globl test_uaddo_only # -- Begin function test_uaddo_only
+ .p2align 4
+ .type test_uaddo_only, at function
+test_uaddo_only: # @test_uaddo_only
+.Lfunc_begin0:
+ .cfi_startproc
+# %bb.0:
+ add 4, 3, 4
+ cmplw 4, 3
+ li 3, -1
+ isellt 3, 3, 4
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end0:
+ .size test_uaddo_only, .Lfunc_end0-.Lfunc_begin0
+ .cfi_endproc
+ # -- End function
+ .section ".note.GNU-stack","", at progbits
diff --git a/test_uaddsat.ll b/test_uaddsat.ll
new file mode 100644
index 0000000000000..0c5423504fb48
--- /dev/null
+++ b/test_uaddsat.ll
@@ -0,0 +1,9 @@
+; Test file to verify uaddo -> uaddsat conversion
+define i32 @test_uaddsat_pattern(i32 %x, i32 %y) {
+ %add = add i32 %x, %y
+ %cmp = icmp ugt i32 %x, %add
+ %sel = select i1 %cmp, i32 -1, i32 %add
+ ret i32 %sel
+}
+
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
diff --git a/test_usubo.ll b/test_usubo.ll
new file mode 100644
index 0000000000000..e588f43f3cec9
--- /dev/null
+++ b/test_usubo.ll
@@ -0,0 +1,15 @@
+; Test file to verify usubo -> usubsat conversion
+define i32 @test_usubo_to_usubsat(i32 %x, i32 %y) {
+ %result = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
+ %val = extractvalue {i32, i1} %result, 0
+ ret i32 %val
+}
+
+define i32 @test_uaddo_to_uaddsat(i32 %x, i32 %y) {
+ %result = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
+ %val = extractvalue {i32, i1} %result, 0
+ ret i32 %val
+}
+
+declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)
+declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
diff --git a/test_vector_uaddo.ll b/test_vector_uaddo.ll
new file mode 100644
index 0000000000000..8105ed0041f54
--- /dev/null
+++ b/test_vector_uaddo.ll
@@ -0,0 +1,9 @@
+define <8 x i16> @test_vector_uaddo(<8 x i16> %x, <8 x i16> %y) {
+ %result = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> %x, <8 x i16> %y)
+ %ovf = extractvalue { <8 x i16>, <8 x i1> } %result, 1
+ %val = extractvalue { <8 x i16>, <8 x i1> } %result, 0
+ %sel = select <8 x i1> %ovf, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %val
+ ret <8 x i16> %sel
+}
+
+declare { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16>, <8 x i16>)
diff --git a/test_vector_uaddo.s b/test_vector_uaddo.s
new file mode 100644
index 0000000000000..5834fc58aa562
--- /dev/null
+++ b/test_vector_uaddo.s
@@ -0,0 +1,21 @@
+ .abiversion 2
+ .file "test_vector_uaddo.ll"
+ .text
+ .globl test_vector_uaddo # -- Begin function test_vector_uaddo
+ .p2align 4
+ .type test_vector_uaddo, at function
+test_vector_uaddo: # @test_vector_uaddo
+.Lfunc_begin0:
+ .cfi_startproc
+# %bb.0:
+ vadduhm 3, 2, 3
+ vcmpgtuh 2, 2, 3
+ xxlor 34, 34, 35
+ blr
+ .long 0
+ .quad 0
+.Lfunc_end0:
+ .size test_vector_uaddo, .Lfunc_end0-.Lfunc_begin0
+ .cfi_endproc
+ # -- End function
+ .section ".note.GNU-stack","", at progbits
diff --git a/trace_uaddsat.ll b/trace_uaddsat.ll
new file mode 100644
index 0000000000000..8fccd2816d67f
--- /dev/null
+++ b/trace_uaddsat.ll
@@ -0,0 +1,6 @@
+define i32 @test_uaddsat(i32 %x, i32 %y) {
+ %add = add i32 %x, %y
+ %cmp = icmp ugt i32 %x, %add
+ %sel = select i1 %cmp, i32 -1, i32 %add
+ ret i32 %sel
+}
>From 91a0b2ef8f29f8fc2fd08604689d8e3c8f2c92d3 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Wed, 24 Sep 2025 12:34:21 -0400
Subject: [PATCH 05/12] j
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 25 ++++++++++++
llvm/test/CodeGen/PowerPC/sat-add.ll | 38 +++++++------------
2 files changed, 39 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a6ba6e518899f..db2fc895cf09f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13347,6 +13347,31 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
}
+ // vselect uaddo(x, y).overflow, -1, uaddo(x, y) -> uaddsat(x, y)
+ // This converts the pattern created by CodeGenPrepare back to uaddsat
+ // Handle the case where overflow might be sign-extended
+ if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
+ // Look through sign_extend_inreg to find the actual overflow flag
+ (void)N0.getOperand(0);
+ if ((N1.getOpcode() == ISD::UADDO && N1.getResNo() == 0 &&
+ ISD::isConstantSplatVectorAllOnes(N2.getNode())) ||
+ (N2.getOpcode() == ISD::UADDO && N2.getResNo() == 0 &&
+ ISD::isConstantSplatVectorAllOnes(N1.getNode()))) {
+ LLVM_DEBUG(dbgs() << "Converting uaddo to uaddsat\n");
+ return DAG.getNode(ISD::UADDSAT, DL, VT,
+ N1.getOpcode() == ISD::UADDO ? N1.getOperand(0) : N2.getOperand(0),
+ N1.getOpcode() == ISD::UADDO ? N1.getOperand(1) : N2.getOperand(1));
+ }
+ } else if ((N1.getOpcode() == ISD::UADDO && N1.getResNo() == 0 &&
+ ISD::isConstantSplatVectorAllOnes(N2.getNode())) ||
+ (N2.getOpcode() == ISD::UADDO && N2.getResNo() == 0 &&
+ ISD::isConstantSplatVectorAllOnes(N1.getNode()))) {
+ LLVM_DEBUG(dbgs() << "Converting uaddo to uaddsat\n");
+ return DAG.getNode(ISD::UADDSAT, DL, VT,
+ N1.getOpcode() == ISD::UADDO ? N1.getOperand(0) : N2.getOperand(0),
+ N1.getOpcode() == ISD::UADDO ? N1.getOperand(1) : N2.getOperand(1));
+ }
+
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
// vselect (setgt X, -1), X, -X ->
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index 012f03f0b884c..f04ef6d329bce 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -400,9 +400,7 @@ define <16 x i8> @unsigned_sat_constant_v16i8_using_cmp_sum(<16 x i8> %x) {
; CHECK-NEXT: addis 3, 2, .LCPI25_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI25_0 at toc@l
; CHECK-NEXT: lxvd2x 35, 0, 3
-; CHECK-NEXT: vaddubm 3, 2, 3
-; CHECK-NEXT: vcmpgtub 2, 2, 3
-; CHECK-NEXT: xxlor 34, 34, 35
+; CHECK-NEXT: vaddubs 2, 2, 3
; CHECK-NEXT: blr
%a = add <16 x i8> %x, <i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>
%c = icmp ugt <16 x i8> %x, %a
@@ -448,9 +446,7 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_cmp_sum(<8 x i16> %x) {
; CHECK-NEXT: addis 3, 2, .LCPI28_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI28_0 at toc@l
; CHECK-NEXT: lxvd2x 35, 0, 3
-; CHECK-NEXT: vadduhm 3, 2, 3
-; CHECK-NEXT: vcmpgtuh 2, 2, 3
-; CHECK-NEXT: xxlor 34, 34, 35
+; CHECK-NEXT: vadduhs 2, 2, 3
; CHECK-NEXT: blr
%a = add <8 x i16> %x, <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>
%c = icmp ugt <8 x i16> %x, %a
@@ -496,9 +492,7 @@ define <4 x i32> @unsigned_sat_constant_v4i32_using_cmp_sum(<4 x i32> %x) {
; CHECK-NEXT: addis 3, 2, .LCPI31_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI31_0 at toc@l
; CHECK-NEXT: lxvd2x 35, 0, 3
-; CHECK-NEXT: vadduwm 3, 2, 3
-; CHECK-NEXT: vcmpgtuw 2, 2, 3
-; CHECK-NEXT: xxlor 34, 34, 35
+; CHECK-NEXT: vadduws 2, 2, 3
; CHECK-NEXT: blr
%a = add <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42>
%c = icmp ugt <4 x i32> %x, %a
@@ -544,9 +538,11 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
; CHECK-NEXT: addis 3, 2, .LCPI34_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI34_0 at toc@l
; CHECK-NEXT: lxvd2x 35, 0, 3
-; CHECK-NEXT: vaddudm 3, 2, 3
-; CHECK-NEXT: vcmpgtud 2, 2, 3
-; CHECK-NEXT: xxlor 34, 34, 35
+; CHECK-NEXT: addis 3, 2, .LCPI34_1 at toc@ha
+; CHECK-NEXT: addi 3, 3, .LCPI34_1 at toc@l
+; CHECK-NEXT: lxvd2x 36, 0, 3
+; CHECK-NEXT: vminud 2, 2, 4
+; CHECK-NEXT: vaddudm 2, 2, 3
; CHECK-NEXT: blr
%a = add <2 x i64> %x, <i64 42, i64 42>
%c = icmp ugt <2 x i64> %x, %a
@@ -590,9 +586,7 @@ define <16 x i8> @unsigned_sat_variable_v16i8_using_min(<16 x i8> %x, <16 x i8>
define <16 x i8> @unsigned_sat_variable_v16i8_using_cmp_sum(<16 x i8> %x, <16 x i8> %y) {
; CHECK-LABEL: unsigned_sat_variable_v16i8_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: vaddubm 3, 2, 3
-; CHECK-NEXT: vcmpgtub 2, 2, 3
-; CHECK-NEXT: xxlor 34, 34, 35
+; CHECK-NEXT: vaddubs 2, 2, 3
; CHECK-NEXT: blr
%a = add <16 x i8> %x, %y
%c = icmp ugt <16 x i8> %x, %a
@@ -632,9 +626,7 @@ define <8 x i16> @unsigned_sat_variable_v8i16_using_min(<8 x i16> %x, <8 x i16>
define <8 x i16> @unsigned_sat_variable_v8i16_using_cmp_sum(<8 x i16> %x, <8 x i16> %y) {
; CHECK-LABEL: unsigned_sat_variable_v8i16_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: vadduhm 3, 2, 3
-; CHECK-NEXT: vcmpgtuh 2, 2, 3
-; CHECK-NEXT: xxlor 34, 34, 35
+; CHECK-NEXT: vadduhs 2, 2, 3
; CHECK-NEXT: blr
%a = add <8 x i16> %x, %y
%c = icmp ugt <8 x i16> %x, %a
@@ -674,9 +666,7 @@ define <4 x i32> @unsigned_sat_variable_v4i32_using_min(<4 x i32> %x, <4 x i32>
define <4 x i32> @unsigned_sat_variable_v4i32_using_cmp_sum(<4 x i32> %x, <4 x i32> %y) {
; CHECK-LABEL: unsigned_sat_variable_v4i32_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: vadduwm 3, 2, 3
-; CHECK-NEXT: vcmpgtuw 2, 2, 3
-; CHECK-NEXT: xxlor 34, 34, 35
+; CHECK-NEXT: vadduws 2, 2, 3
; CHECK-NEXT: blr
%a = add <4 x i32> %x, %y
%c = icmp ugt <4 x i32> %x, %a
@@ -716,9 +706,9 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64>
define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) {
; CHECK-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: vaddudm 3, 2, 3
-; CHECK-NEXT: vcmpgtud 2, 2, 3
-; CHECK-NEXT: xxlor 34, 34, 35
+; CHECK-NEXT: xxlnor 36, 35, 35
+; CHECK-NEXT: vminud 2, 2, 4
+; CHECK-NEXT: vaddudm 2, 2, 3
; CHECK-NEXT: blr
%a = add <2 x i64> %x, %y
%c = icmp ugt <2 x i64> %x, %a
>From c2d26fc6ca2d9825f053604936f8922bb7e4acea Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Wed, 24 Sep 2025 12:41:41 -0400
Subject: [PATCH 06/12] Revert "f"
This reverts commit 1342b3027025367dfcaf61d13ee1ce1f6ecd9739.
---
llvm/test/CodeGen/PowerPC/sat-add.ll | 38 +-
llvm/test/CodeGen/PowerPC/sat-add.s | 1260 ---------------------
llvm/test/CodeGen/RISCV/branch-on-zero.ll | 28 +-
llvm/test/CodeGen/X86/psubus.ll | 814 ++++---------
llvm/test/CodeGen/X86/select.ll | 12 +-
test_direct_uaddo.ll | 9 -
test_sat_pattern.ll | 6 -
test_sat_pattern.s | 0
test_scalar_sat.ll | 6 -
test_uaddo_conversion.ll | 9 -
test_uaddo_only.ll | 6 -
test_uaddo_only.s | 22 -
test_uaddsat.ll | 9 -
test_usubo.ll | 15 -
test_vector_uaddo.ll | 9 -
test_vector_uaddo.s | 21 -
trace_uaddsat.ll | 6 -
17 files changed, 290 insertions(+), 1980 deletions(-)
delete mode 100644 llvm/test/CodeGen/PowerPC/sat-add.s
delete mode 100644 test_direct_uaddo.ll
delete mode 100644 test_sat_pattern.ll
delete mode 100644 test_sat_pattern.s
delete mode 100644 test_scalar_sat.ll
delete mode 100644 test_uaddo_conversion.ll
delete mode 100644 test_uaddo_only.ll
delete mode 100644 test_uaddo_only.s
delete mode 100644 test_uaddsat.ll
delete mode 100644 test_usubo.ll
delete mode 100644 test_vector_uaddo.ll
delete mode 100644 test_vector_uaddo.s
delete mode 100644 trace_uaddsat.ll
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index f04ef6d329bce..771c2ca0a866c 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -24,11 +24,12 @@ define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 3, 3, 24
+; CHECK-NEXT: clrlwi 4, 3, 24
; CHECK-NEXT: addi 3, 3, 42
-; CHECK-NEXT: andi. 4, 3, 256
+; CHECK-NEXT: clrlwi 5, 3, 24
+; CHECK-NEXT: cmplw 4, 5
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iseleq 3, 3, 4
+; CHECK-NEXT: iselgt 3, 4, 3
; CHECK-NEXT: blr
%a = add i8 %x, 42
%c = icmp ugt i8 %x, %a
@@ -69,11 +70,12 @@ define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 3, 3, 16
+; CHECK-NEXT: clrlwi 4, 3, 16
; CHECK-NEXT: addi 3, 3, 42
-; CHECK-NEXT: andis. 4, 3, 1
+; CHECK-NEXT: clrlwi 5, 3, 16
+; CHECK-NEXT: cmplw 4, 5
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iseleq 3, 3, 4
+; CHECK-NEXT: iselgt 3, 4, 3
; CHECK-NEXT: blr
%a = add i16 %x, 42
%c = icmp ugt i16 %x, %a
@@ -114,9 +116,9 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
; CHECK-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
; CHECK: # %bb.0:
; CHECK-NEXT: addi 4, 3, 42
-; CHECK-NEXT: cmplw 4, 3
+; CHECK-NEXT: cmplw 3, 4
; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: isellt 3, 3, 4
+; CHECK-NEXT: iselgt 3, 3, 4
; CHECK-NEXT: blr
%a = add i32 %x, 42
%c = icmp ugt i32 %x, %a
@@ -203,12 +205,12 @@ define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 4, 4, 24
-; CHECK-NEXT: clrlwi 3, 3, 24
+; CHECK-NEXT: clrlwi 5, 3, 24
; CHECK-NEXT: add 3, 3, 4
-; CHECK-NEXT: andi. 4, 3, 256
+; CHECK-NEXT: clrlwi 4, 3, 24
+; CHECK-NEXT: cmplw 5, 4
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iseleq 3, 3, 4
+; CHECK-NEXT: iselgt 3, 4, 3
; CHECK-NEXT: blr
%a = add i8 %x, %y
%c = icmp ugt i8 %x, %a
@@ -254,12 +256,12 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 4, 4, 16
-; CHECK-NEXT: clrlwi 3, 3, 16
+; CHECK-NEXT: clrlwi 5, 3, 16
; CHECK-NEXT: add 3, 3, 4
-; CHECK-NEXT: andis. 4, 3, 1
+; CHECK-NEXT: clrlwi 4, 3, 16
+; CHECK-NEXT: cmplw 5, 4
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iseleq 3, 3, 4
+; CHECK-NEXT: iselgt 3, 4, 3
; CHECK-NEXT: blr
%a = add i16 %x, %y
%c = icmp ugt i16 %x, %a
@@ -304,9 +306,9 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
; CHECK-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
; CHECK: # %bb.0:
; CHECK-NEXT: add 4, 3, 4
-; CHECK-NEXT: cmplw 4, 3
+; CHECK-NEXT: cmplw 3, 4
; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: isellt 3, 3, 4
+; CHECK-NEXT: iselgt 3, 3, 4
; CHECK-NEXT: blr
%a = add i32 %x, %y
%c = icmp ugt i32 %x, %a
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.s b/llvm/test/CodeGen/PowerPC/sat-add.s
deleted file mode 100644
index ca085fc0f6997..0000000000000
--- a/llvm/test/CodeGen/PowerPC/sat-add.s
+++ /dev/null
@@ -1,1260 +0,0 @@
- .abiversion 2
- .file "sat-add.ll"
- .text
- .globl unsigned_sat_constant_i8_using_min # -- Begin function unsigned_sat_constant_i8_using_min
- .p2align 4
- .type unsigned_sat_constant_i8_using_min, at function
-unsigned_sat_constant_i8_using_min: # @unsigned_sat_constant_i8_using_min
-.Lfunc_begin0:
- .cfi_startproc
-# %bb.0:
- clrlwi 4, 3, 24
- cmplwi 4, 213
- li 4, -43
- isellt 3, 3, 4
- addi 3, 3, 42
- blr
- .long 0
- .quad 0
-.Lfunc_end0:
- .size unsigned_sat_constant_i8_using_min, .Lfunc_end0-.Lfunc_begin0
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i8_using_cmp_sum # -- Begin function unsigned_sat_constant_i8_using_cmp_sum
- .p2align 4
- .type unsigned_sat_constant_i8_using_cmp_sum, at function
-unsigned_sat_constant_i8_using_cmp_sum: # @unsigned_sat_constant_i8_using_cmp_sum
-.Lfunc_begin1:
- .cfi_startproc
-# %bb.0:
- clrlwi 3, 3, 24
- addi 3, 3, 42
- andi. 4, 3, 256
- li 4, -1
- iseleq 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end1:
- .size unsigned_sat_constant_i8_using_cmp_sum, .Lfunc_end1-.Lfunc_begin1
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i8_using_cmp_notval # -- Begin function unsigned_sat_constant_i8_using_cmp_notval
- .p2align 4
- .type unsigned_sat_constant_i8_using_cmp_notval, at function
-unsigned_sat_constant_i8_using_cmp_notval: # @unsigned_sat_constant_i8_using_cmp_notval
-.Lfunc_begin2:
- .cfi_startproc
-# %bb.0:
- clrlwi 4, 3, 24
- addi 3, 3, 42
- cmplwi 4, 213
- li 4, -1
- iselgt 3, 4, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end2:
- .size unsigned_sat_constant_i8_using_cmp_notval, .Lfunc_end2-.Lfunc_begin2
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i16_using_min # -- Begin function unsigned_sat_constant_i16_using_min
- .p2align 4
- .type unsigned_sat_constant_i16_using_min, at function
-unsigned_sat_constant_i16_using_min: # @unsigned_sat_constant_i16_using_min
-.Lfunc_begin3:
- .cfi_startproc
-# %bb.0:
- clrlwi 4, 3, 16
- cmplwi 4, 65493
- li 4, -43
- isellt 3, 3, 4
- addi 3, 3, 42
- blr
- .long 0
- .quad 0
-.Lfunc_end3:
- .size unsigned_sat_constant_i16_using_min, .Lfunc_end3-.Lfunc_begin3
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i16_using_cmp_sum # -- Begin function unsigned_sat_constant_i16_using_cmp_sum
- .p2align 4
- .type unsigned_sat_constant_i16_using_cmp_sum, at function
-unsigned_sat_constant_i16_using_cmp_sum: # @unsigned_sat_constant_i16_using_cmp_sum
-.Lfunc_begin4:
- .cfi_startproc
-# %bb.0:
- clrlwi 3, 3, 16
- addi 3, 3, 42
- andis. 4, 3, 1
- li 4, -1
- iseleq 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end4:
- .size unsigned_sat_constant_i16_using_cmp_sum, .Lfunc_end4-.Lfunc_begin4
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i16_using_cmp_notval # -- Begin function unsigned_sat_constant_i16_using_cmp_notval
- .p2align 4
- .type unsigned_sat_constant_i16_using_cmp_notval, at function
-unsigned_sat_constant_i16_using_cmp_notval: # @unsigned_sat_constant_i16_using_cmp_notval
-.Lfunc_begin5:
- .cfi_startproc
-# %bb.0:
- clrlwi 4, 3, 16
- addi 3, 3, 42
- cmplwi 4, 65493
- li 4, -1
- iselgt 3, 4, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end5:
- .size unsigned_sat_constant_i16_using_cmp_notval, .Lfunc_end5-.Lfunc_begin5
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i32_using_min # -- Begin function unsigned_sat_constant_i32_using_min
- .p2align 4
- .type unsigned_sat_constant_i32_using_min, at function
-unsigned_sat_constant_i32_using_min: # @unsigned_sat_constant_i32_using_min
-.Lfunc_begin6:
- .cfi_startproc
-# %bb.0:
- li 4, -43
- cmplw 3, 4
- isellt 3, 3, 4
- addi 3, 3, 42
- blr
- .long 0
- .quad 0
-.Lfunc_end6:
- .size unsigned_sat_constant_i32_using_min, .Lfunc_end6-.Lfunc_begin6
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i32_using_cmp_sum # -- Begin function unsigned_sat_constant_i32_using_cmp_sum
- .p2align 4
- .type unsigned_sat_constant_i32_using_cmp_sum, at function
-unsigned_sat_constant_i32_using_cmp_sum: # @unsigned_sat_constant_i32_using_cmp_sum
-.Lfunc_begin7:
- .cfi_startproc
-# %bb.0:
- addi 4, 3, 42
- cmplw 4, 3
- li 3, -1
- isellt 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end7:
- .size unsigned_sat_constant_i32_using_cmp_sum, .Lfunc_end7-.Lfunc_begin7
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i32_using_cmp_notval # -- Begin function unsigned_sat_constant_i32_using_cmp_notval
- .p2align 4
- .type unsigned_sat_constant_i32_using_cmp_notval, at function
-unsigned_sat_constant_i32_using_cmp_notval: # @unsigned_sat_constant_i32_using_cmp_notval
-.Lfunc_begin8:
- .cfi_startproc
-# %bb.0:
- li 5, -43
- addi 4, 3, 42
- cmplw 3, 5
- li 3, -1
- iselgt 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end8:
- .size unsigned_sat_constant_i32_using_cmp_notval, .Lfunc_end8-.Lfunc_begin8
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i64_using_min # -- Begin function unsigned_sat_constant_i64_using_min
- .p2align 4
- .type unsigned_sat_constant_i64_using_min, at function
-unsigned_sat_constant_i64_using_min: # @unsigned_sat_constant_i64_using_min
-.Lfunc_begin9:
- .cfi_startproc
-# %bb.0:
- li 4, -43
- cmpld 3, 4
- isellt 3, 3, 4
- addi 3, 3, 42
- blr
- .long 0
- .quad 0
-.Lfunc_end9:
- .size unsigned_sat_constant_i64_using_min, .Lfunc_end9-.Lfunc_begin9
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i64_using_cmp_sum # -- Begin function unsigned_sat_constant_i64_using_cmp_sum
- .p2align 4
- .type unsigned_sat_constant_i64_using_cmp_sum, at function
-unsigned_sat_constant_i64_using_cmp_sum: # @unsigned_sat_constant_i64_using_cmp_sum
-.Lfunc_begin10:
- .cfi_startproc
-# %bb.0:
- li 4, 0
- addic 3, 3, 42
- addze. 4, 4
- li 4, -1
- iseleq 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end10:
- .size unsigned_sat_constant_i64_using_cmp_sum, .Lfunc_end10-.Lfunc_begin10
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i64_using_cmp_notval # -- Begin function unsigned_sat_constant_i64_using_cmp_notval
- .p2align 4
- .type unsigned_sat_constant_i64_using_cmp_notval, at function
-unsigned_sat_constant_i64_using_cmp_notval: # @unsigned_sat_constant_i64_using_cmp_notval
-.Lfunc_begin11:
- .cfi_startproc
-# %bb.0:
- li 4, 0
- addic 3, 3, 42
- addze. 4, 4
- li 4, -1
- iseleq 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end11:
- .size unsigned_sat_constant_i64_using_cmp_notval, .Lfunc_end11-.Lfunc_begin11
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_i8_using_min # -- Begin function unsigned_sat_variable_i8_using_min
- .p2align 4
- .type unsigned_sat_variable_i8_using_min, at function
-unsigned_sat_variable_i8_using_min: # @unsigned_sat_variable_i8_using_min
-.Lfunc_begin12:
- .cfi_startproc
-# %bb.0:
- not 6, 4
- clrlwi 5, 3, 24
- clrlwi 7, 6, 24
- cmplw 5, 7
- isellt 3, 3, 6
- add 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end12:
- .size unsigned_sat_variable_i8_using_min, .Lfunc_end12-.Lfunc_begin12
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_i8_using_cmp_sum # -- Begin function unsigned_sat_variable_i8_using_cmp_sum
- .p2align 4
- .type unsigned_sat_variable_i8_using_cmp_sum, at function
-unsigned_sat_variable_i8_using_cmp_sum: # @unsigned_sat_variable_i8_using_cmp_sum
-.Lfunc_begin13:
- .cfi_startproc
-# %bb.0:
- clrlwi 4, 4, 24
- clrlwi 3, 3, 24
- add 3, 3, 4
- andi. 4, 3, 256
- li 4, -1
- iseleq 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end13:
- .size unsigned_sat_variable_i8_using_cmp_sum, .Lfunc_end13-.Lfunc_begin13
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_i8_using_cmp_notval # -- Begin function unsigned_sat_variable_i8_using_cmp_notval
- .p2align 4
- .type unsigned_sat_variable_i8_using_cmp_notval, at function
-unsigned_sat_variable_i8_using_cmp_notval: # @unsigned_sat_variable_i8_using_cmp_notval
-.Lfunc_begin14:
- .cfi_startproc
-# %bb.0:
- not 6, 4
- clrlwi 5, 3, 24
- add 3, 3, 4
- li 4, -1
- clrlwi 6, 6, 24
- cmplw 5, 6
- iselgt 3, 4, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end14:
- .size unsigned_sat_variable_i8_using_cmp_notval, .Lfunc_end14-.Lfunc_begin14
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_i16_using_min # -- Begin function unsigned_sat_variable_i16_using_min
- .p2align 4
- .type unsigned_sat_variable_i16_using_min, at function
-unsigned_sat_variable_i16_using_min: # @unsigned_sat_variable_i16_using_min
-.Lfunc_begin15:
- .cfi_startproc
-# %bb.0:
- not 6, 4
- clrlwi 5, 3, 16
- clrlwi 7, 6, 16
- cmplw 5, 7
- isellt 3, 3, 6
- add 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end15:
- .size unsigned_sat_variable_i16_using_min, .Lfunc_end15-.Lfunc_begin15
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_i16_using_cmp_sum # -- Begin function unsigned_sat_variable_i16_using_cmp_sum
- .p2align 4
- .type unsigned_sat_variable_i16_using_cmp_sum, at function
-unsigned_sat_variable_i16_using_cmp_sum: # @unsigned_sat_variable_i16_using_cmp_sum
-.Lfunc_begin16:
- .cfi_startproc
-# %bb.0:
- clrlwi 4, 4, 16
- clrlwi 3, 3, 16
- add 3, 3, 4
- andis. 4, 3, 1
- li 4, -1
- iseleq 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end16:
- .size unsigned_sat_variable_i16_using_cmp_sum, .Lfunc_end16-.Lfunc_begin16
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_i16_using_cmp_notval # -- Begin function unsigned_sat_variable_i16_using_cmp_notval
- .p2align 4
- .type unsigned_sat_variable_i16_using_cmp_notval, at function
-unsigned_sat_variable_i16_using_cmp_notval: # @unsigned_sat_variable_i16_using_cmp_notval
-.Lfunc_begin17:
- .cfi_startproc
-# %bb.0:
- not 6, 4
- clrlwi 5, 3, 16
- add 3, 3, 4
- li 4, -1
- clrlwi 6, 6, 16
- cmplw 5, 6
- iselgt 3, 4, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end17:
- .size unsigned_sat_variable_i16_using_cmp_notval, .Lfunc_end17-.Lfunc_begin17
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_i32_using_min # -- Begin function unsigned_sat_variable_i32_using_min
- .p2align 4
- .type unsigned_sat_variable_i32_using_min, at function
-unsigned_sat_variable_i32_using_min: # @unsigned_sat_variable_i32_using_min
-.Lfunc_begin18:
- .cfi_startproc
-# %bb.0:
- not 5, 4
- cmplw 3, 5
- isellt 3, 3, 5
- add 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end18:
- .size unsigned_sat_variable_i32_using_min, .Lfunc_end18-.Lfunc_begin18
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_i32_using_cmp_sum # -- Begin function unsigned_sat_variable_i32_using_cmp_sum
- .p2align 4
- .type unsigned_sat_variable_i32_using_cmp_sum, at function
-unsigned_sat_variable_i32_using_cmp_sum: # @unsigned_sat_variable_i32_using_cmp_sum
-.Lfunc_begin19:
- .cfi_startproc
-# %bb.0:
- add 4, 3, 4
- cmplw 4, 3
- li 3, -1
- isellt 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end19:
- .size unsigned_sat_variable_i32_using_cmp_sum, .Lfunc_end19-.Lfunc_begin19
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_i32_using_cmp_notval # -- Begin function unsigned_sat_variable_i32_using_cmp_notval
- .p2align 4
- .type unsigned_sat_variable_i32_using_cmp_notval, at function
-unsigned_sat_variable_i32_using_cmp_notval: # @unsigned_sat_variable_i32_using_cmp_notval
-.Lfunc_begin20:
- .cfi_startproc
-# %bb.0:
- not 5, 4
- add 4, 3, 4
- cmplw 3, 5
- li 3, -1
- iselgt 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end20:
- .size unsigned_sat_variable_i32_using_cmp_notval, .Lfunc_end20-.Lfunc_begin20
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_i64_using_min # -- Begin function unsigned_sat_variable_i64_using_min
- .p2align 4
- .type unsigned_sat_variable_i64_using_min, at function
-unsigned_sat_variable_i64_using_min: # @unsigned_sat_variable_i64_using_min
-.Lfunc_begin21:
- .cfi_startproc
-# %bb.0:
- not 5, 4
- cmpld 3, 5
- isellt 3, 3, 5
- add 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end21:
- .size unsigned_sat_variable_i64_using_min, .Lfunc_end21-.Lfunc_begin21
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_i64_using_cmp_sum # -- Begin function unsigned_sat_variable_i64_using_cmp_sum
- .p2align 4
- .type unsigned_sat_variable_i64_using_cmp_sum, at function
-unsigned_sat_variable_i64_using_cmp_sum: # @unsigned_sat_variable_i64_using_cmp_sum
-.Lfunc_begin22:
- .cfi_startproc
-# %bb.0:
- addc 3, 3, 4
- li 4, 0
- addze. 4, 4
- li 4, -1
- iseleq 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end22:
- .size unsigned_sat_variable_i64_using_cmp_sum, .Lfunc_end22-.Lfunc_begin22
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_i64_using_cmp_notval # -- Begin function unsigned_sat_variable_i64_using_cmp_notval
- .p2align 4
- .type unsigned_sat_variable_i64_using_cmp_notval, at function
-unsigned_sat_variable_i64_using_cmp_notval: # @unsigned_sat_variable_i64_using_cmp_notval
-.Lfunc_begin23:
- .cfi_startproc
-# %bb.0:
- not 5, 4
- add 4, 3, 4
- cmpld 3, 5
- li 3, -1
- iselgt 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end23:
- .size unsigned_sat_variable_i64_using_cmp_notval, .Lfunc_end23-.Lfunc_begin23
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v16i8_using_min
-.LCPI24_0:
- .space 16,213
-.LCPI24_1:
- .space 16,42
- .text
- .globl unsigned_sat_constant_v16i8_using_min
- .p2align 4
- .type unsigned_sat_constant_v16i8_using_min, at function
-unsigned_sat_constant_v16i8_using_min: # @unsigned_sat_constant_v16i8_using_min
-.Lfunc_begin24:
- .cfi_startproc
-.Lfunc_gep24:
- addis 2, 12, .TOC.-.Lfunc_gep24 at ha
- addi 2, 2, .TOC.-.Lfunc_gep24 at l
-.Lfunc_lep24:
- .localentry unsigned_sat_constant_v16i8_using_min, .Lfunc_lep24-.Lfunc_gep24
-# %bb.0:
- addis 3, 2, .LCPI24_0 at toc@ha
- addi 3, 3, .LCPI24_0 at toc@l
- lxvd2x 35, 0, 3
- addis 3, 2, .LCPI24_1 at toc@ha
- addi 3, 3, .LCPI24_1 at toc@l
- vminub 2, 2, 3
- lxvd2x 35, 0, 3
- vaddubm 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end24:
- .size unsigned_sat_constant_v16i8_using_min, .Lfunc_end24-.Lfunc_begin24
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v16i8_using_cmp_sum
-.LCPI25_0:
- .space 16,42
- .text
- .globl unsigned_sat_constant_v16i8_using_cmp_sum
- .p2align 4
- .type unsigned_sat_constant_v16i8_using_cmp_sum, at function
-unsigned_sat_constant_v16i8_using_cmp_sum: # @unsigned_sat_constant_v16i8_using_cmp_sum
-.Lfunc_begin25:
- .cfi_startproc
-.Lfunc_gep25:
- addis 2, 12, .TOC.-.Lfunc_gep25 at ha
- addi 2, 2, .TOC.-.Lfunc_gep25 at l
-.Lfunc_lep25:
- .localentry unsigned_sat_constant_v16i8_using_cmp_sum, .Lfunc_lep25-.Lfunc_gep25
-# %bb.0:
- addis 3, 2, .LCPI25_0 at toc@ha
- addi 3, 3, .LCPI25_0 at toc@l
- lxvd2x 35, 0, 3
- vaddubs 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end25:
- .size unsigned_sat_constant_v16i8_using_cmp_sum, .Lfunc_end25-.Lfunc_begin25
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v16i8_using_cmp_notval
-.LCPI26_0:
- .space 16,42
- .text
- .globl unsigned_sat_constant_v16i8_using_cmp_notval
- .p2align 4
- .type unsigned_sat_constant_v16i8_using_cmp_notval, at function
-unsigned_sat_constant_v16i8_using_cmp_notval: # @unsigned_sat_constant_v16i8_using_cmp_notval
-.Lfunc_begin26:
- .cfi_startproc
-.Lfunc_gep26:
- addis 2, 12, .TOC.-.Lfunc_gep26 at ha
- addi 2, 2, .TOC.-.Lfunc_gep26 at l
-.Lfunc_lep26:
- .localentry unsigned_sat_constant_v16i8_using_cmp_notval, .Lfunc_lep26-.Lfunc_gep26
-# %bb.0:
- addis 3, 2, .LCPI26_0 at toc@ha
- addi 3, 3, .LCPI26_0 at toc@l
- lxvd2x 35, 0, 3
- vaddubs 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end26:
- .size unsigned_sat_constant_v16i8_using_cmp_notval, .Lfunc_end26-.Lfunc_begin26
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v8i16_using_min
-.LCPI27_0:
- .short 65493 # 0xffd5
- .short 65493 # 0xffd5
- .short 65493 # 0xffd5
- .short 65493 # 0xffd5
- .short 65493 # 0xffd5
- .short 65493 # 0xffd5
- .short 65493 # 0xffd5
- .short 65493 # 0xffd5
-.LCPI27_1:
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .text
- .globl unsigned_sat_constant_v8i16_using_min
- .p2align 4
- .type unsigned_sat_constant_v8i16_using_min, at function
-unsigned_sat_constant_v8i16_using_min: # @unsigned_sat_constant_v8i16_using_min
-.Lfunc_begin27:
- .cfi_startproc
-.Lfunc_gep27:
- addis 2, 12, .TOC.-.Lfunc_gep27 at ha
- addi 2, 2, .TOC.-.Lfunc_gep27 at l
-.Lfunc_lep27:
- .localentry unsigned_sat_constant_v8i16_using_min, .Lfunc_lep27-.Lfunc_gep27
-# %bb.0:
- addis 3, 2, .LCPI27_0 at toc@ha
- addi 3, 3, .LCPI27_0 at toc@l
- lxvd2x 35, 0, 3
- addis 3, 2, .LCPI27_1 at toc@ha
- addi 3, 3, .LCPI27_1 at toc@l
- vminuh 2, 2, 3
- lxvd2x 35, 0, 3
- vadduhm 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end27:
- .size unsigned_sat_constant_v8i16_using_min, .Lfunc_end27-.Lfunc_begin27
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v8i16_using_cmp_sum
-.LCPI28_0:
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .text
- .globl unsigned_sat_constant_v8i16_using_cmp_sum
- .p2align 4
- .type unsigned_sat_constant_v8i16_using_cmp_sum, at function
-unsigned_sat_constant_v8i16_using_cmp_sum: # @unsigned_sat_constant_v8i16_using_cmp_sum
-.Lfunc_begin28:
- .cfi_startproc
-.Lfunc_gep28:
- addis 2, 12, .TOC.-.Lfunc_gep28 at ha
- addi 2, 2, .TOC.-.Lfunc_gep28 at l
-.Lfunc_lep28:
- .localentry unsigned_sat_constant_v8i16_using_cmp_sum, .Lfunc_lep28-.Lfunc_gep28
-# %bb.0:
- addis 3, 2, .LCPI28_0 at toc@ha
- addi 3, 3, .LCPI28_0 at toc@l
- lxvd2x 35, 0, 3
- vadduhs 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end28:
- .size unsigned_sat_constant_v8i16_using_cmp_sum, .Lfunc_end28-.Lfunc_begin28
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v8i16_using_cmp_notval
-.LCPI29_0:
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .short 42 # 0x2a
- .text
- .globl unsigned_sat_constant_v8i16_using_cmp_notval
- .p2align 4
- .type unsigned_sat_constant_v8i16_using_cmp_notval, at function
-unsigned_sat_constant_v8i16_using_cmp_notval: # @unsigned_sat_constant_v8i16_using_cmp_notval
-.Lfunc_begin29:
- .cfi_startproc
-.Lfunc_gep29:
- addis 2, 12, .TOC.-.Lfunc_gep29 at ha
- addi 2, 2, .TOC.-.Lfunc_gep29 at l
-.Lfunc_lep29:
- .localentry unsigned_sat_constant_v8i16_using_cmp_notval, .Lfunc_lep29-.Lfunc_gep29
-# %bb.0:
- addis 3, 2, .LCPI29_0 at toc@ha
- addi 3, 3, .LCPI29_0 at toc@l
- lxvd2x 35, 0, 3
- vadduhs 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end29:
- .size unsigned_sat_constant_v8i16_using_cmp_notval, .Lfunc_end29-.Lfunc_begin29
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v4i32_using_min
-.LCPI30_0:
- .long 4294967253 # 0xffffffd5
- .long 4294967253 # 0xffffffd5
- .long 4294967253 # 0xffffffd5
- .long 4294967253 # 0xffffffd5
-.LCPI30_1:
- .long 42 # 0x2a
- .long 42 # 0x2a
- .long 42 # 0x2a
- .long 42 # 0x2a
- .text
- .globl unsigned_sat_constant_v4i32_using_min
- .p2align 4
- .type unsigned_sat_constant_v4i32_using_min, at function
-unsigned_sat_constant_v4i32_using_min: # @unsigned_sat_constant_v4i32_using_min
-.Lfunc_begin30:
- .cfi_startproc
-.Lfunc_gep30:
- addis 2, 12, .TOC.-.Lfunc_gep30 at ha
- addi 2, 2, .TOC.-.Lfunc_gep30 at l
-.Lfunc_lep30:
- .localentry unsigned_sat_constant_v4i32_using_min, .Lfunc_lep30-.Lfunc_gep30
-# %bb.0:
- addis 3, 2, .LCPI30_0 at toc@ha
- addi 3, 3, .LCPI30_0 at toc@l
- lxvd2x 35, 0, 3
- addis 3, 2, .LCPI30_1 at toc@ha
- addi 3, 3, .LCPI30_1 at toc@l
- vminuw 2, 2, 3
- lxvd2x 35, 0, 3
- vadduwm 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end30:
- .size unsigned_sat_constant_v4i32_using_min, .Lfunc_end30-.Lfunc_begin30
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v4i32_using_cmp_sum
-.LCPI31_0:
- .long 42 # 0x2a
- .long 42 # 0x2a
- .long 42 # 0x2a
- .long 42 # 0x2a
- .text
- .globl unsigned_sat_constant_v4i32_using_cmp_sum
- .p2align 4
- .type unsigned_sat_constant_v4i32_using_cmp_sum, at function
-unsigned_sat_constant_v4i32_using_cmp_sum: # @unsigned_sat_constant_v4i32_using_cmp_sum
-.Lfunc_begin31:
- .cfi_startproc
-.Lfunc_gep31:
- addis 2, 12, .TOC.-.Lfunc_gep31 at ha
- addi 2, 2, .TOC.-.Lfunc_gep31 at l
-.Lfunc_lep31:
- .localentry unsigned_sat_constant_v4i32_using_cmp_sum, .Lfunc_lep31-.Lfunc_gep31
-# %bb.0:
- addis 3, 2, .LCPI31_0 at toc@ha
- addi 3, 3, .LCPI31_0 at toc@l
- lxvd2x 35, 0, 3
- vadduws 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end31:
- .size unsigned_sat_constant_v4i32_using_cmp_sum, .Lfunc_end31-.Lfunc_begin31
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v4i32_using_cmp_notval
-.LCPI32_0:
- .long 42 # 0x2a
- .long 42 # 0x2a
- .long 42 # 0x2a
- .long 42 # 0x2a
- .text
- .globl unsigned_sat_constant_v4i32_using_cmp_notval
- .p2align 4
- .type unsigned_sat_constant_v4i32_using_cmp_notval, at function
-unsigned_sat_constant_v4i32_using_cmp_notval: # @unsigned_sat_constant_v4i32_using_cmp_notval
-.Lfunc_begin32:
- .cfi_startproc
-.Lfunc_gep32:
- addis 2, 12, .TOC.-.Lfunc_gep32 at ha
- addi 2, 2, .TOC.-.Lfunc_gep32 at l
-.Lfunc_lep32:
- .localentry unsigned_sat_constant_v4i32_using_cmp_notval, .Lfunc_lep32-.Lfunc_gep32
-# %bb.0:
- addis 3, 2, .LCPI32_0 at toc@ha
- addi 3, 3, .LCPI32_0 at toc@l
- lxvd2x 35, 0, 3
- vadduws 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end32:
- .size unsigned_sat_constant_v4i32_using_cmp_notval, .Lfunc_end32-.Lfunc_begin32
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v2i64_using_min
-.LCPI33_0:
- .quad -43 # 0xffffffffffffffd5
- .quad -43 # 0xffffffffffffffd5
-.LCPI33_1:
- .quad 42 # 0x2a
- .quad 42 # 0x2a
- .text
- .globl unsigned_sat_constant_v2i64_using_min
- .p2align 4
- .type unsigned_sat_constant_v2i64_using_min, at function
-unsigned_sat_constant_v2i64_using_min: # @unsigned_sat_constant_v2i64_using_min
-.Lfunc_begin33:
- .cfi_startproc
-.Lfunc_gep33:
- addis 2, 12, .TOC.-.Lfunc_gep33 at ha
- addi 2, 2, .TOC.-.Lfunc_gep33 at l
-.Lfunc_lep33:
- .localentry unsigned_sat_constant_v2i64_using_min, .Lfunc_lep33-.Lfunc_gep33
-# %bb.0:
- addis 3, 2, .LCPI33_0 at toc@ha
- addi 3, 3, .LCPI33_0 at toc@l
- lxvd2x 35, 0, 3
- addis 3, 2, .LCPI33_1 at toc@ha
- addi 3, 3, .LCPI33_1 at toc@l
- vminud 2, 2, 3
- lxvd2x 35, 0, 3
- vaddudm 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end33:
- .size unsigned_sat_constant_v2i64_using_min, .Lfunc_end33-.Lfunc_begin33
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v2i64_using_cmp_sum
-.LCPI34_0:
- .quad 42 # 0x2a
- .quad 42 # 0x2a
-.LCPI34_1:
- .quad -43 # 0xffffffffffffffd5
- .quad -43 # 0xffffffffffffffd5
- .text
- .globl unsigned_sat_constant_v2i64_using_cmp_sum
- .p2align 4
- .type unsigned_sat_constant_v2i64_using_cmp_sum, at function
-unsigned_sat_constant_v2i64_using_cmp_sum: # @unsigned_sat_constant_v2i64_using_cmp_sum
-.Lfunc_begin34:
- .cfi_startproc
-.Lfunc_gep34:
- addis 2, 12, .TOC.-.Lfunc_gep34 at ha
- addi 2, 2, .TOC.-.Lfunc_gep34 at l
-.Lfunc_lep34:
- .localentry unsigned_sat_constant_v2i64_using_cmp_sum, .Lfunc_lep34-.Lfunc_gep34
-# %bb.0:
- addis 3, 2, .LCPI34_0 at toc@ha
- addi 3, 3, .LCPI34_0 at toc@l
- lxvd2x 35, 0, 3
- addis 3, 2, .LCPI34_1 at toc@ha
- addi 3, 3, .LCPI34_1 at toc@l
- lxvd2x 36, 0, 3
- vminud 2, 2, 4
- vaddudm 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end34:
- .size unsigned_sat_constant_v2i64_using_cmp_sum, .Lfunc_end34-.Lfunc_begin34
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function unsigned_sat_constant_v2i64_using_cmp_notval
-.LCPI35_0:
- .quad 42 # 0x2a
- .quad 42 # 0x2a
-.LCPI35_1:
- .quad -43 # 0xffffffffffffffd5
- .quad -43 # 0xffffffffffffffd5
- .text
- .globl unsigned_sat_constant_v2i64_using_cmp_notval
- .p2align 4
- .type unsigned_sat_constant_v2i64_using_cmp_notval, at function
-unsigned_sat_constant_v2i64_using_cmp_notval: # @unsigned_sat_constant_v2i64_using_cmp_notval
-.Lfunc_begin35:
- .cfi_startproc
-.Lfunc_gep35:
- addis 2, 12, .TOC.-.Lfunc_gep35 at ha
- addi 2, 2, .TOC.-.Lfunc_gep35 at l
-.Lfunc_lep35:
- .localentry unsigned_sat_constant_v2i64_using_cmp_notval, .Lfunc_lep35-.Lfunc_gep35
-# %bb.0:
- addis 3, 2, .LCPI35_0 at toc@ha
- addi 3, 3, .LCPI35_0 at toc@l
- lxvd2x 35, 0, 3
- addis 3, 2, .LCPI35_1 at toc@ha
- addi 3, 3, .LCPI35_1 at toc@l
- lxvd2x 36, 0, 3
- vaddudm 3, 2, 3
- vcmpgtud 2, 2, 4
- xxlor 34, 34, 35
- blr
- .long 0
- .quad 0
-.Lfunc_end35:
- .size unsigned_sat_constant_v2i64_using_cmp_notval, .Lfunc_end35-.Lfunc_begin35
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_v16i8_using_min # -- Begin function unsigned_sat_variable_v16i8_using_min
- .p2align 4
- .type unsigned_sat_variable_v16i8_using_min, at function
-unsigned_sat_variable_v16i8_using_min: # @unsigned_sat_variable_v16i8_using_min
-.Lfunc_begin36:
- .cfi_startproc
-# %bb.0:
- xxlnor 36, 35, 35
- vminub 2, 2, 4
- vaddubm 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end36:
- .size unsigned_sat_variable_v16i8_using_min, .Lfunc_end36-.Lfunc_begin36
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_v16i8_using_cmp_sum # -- Begin function unsigned_sat_variable_v16i8_using_cmp_sum
- .p2align 4
- .type unsigned_sat_variable_v16i8_using_cmp_sum, at function
-unsigned_sat_variable_v16i8_using_cmp_sum: # @unsigned_sat_variable_v16i8_using_cmp_sum
-.Lfunc_begin37:
- .cfi_startproc
-# %bb.0:
- vaddubs 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end37:
- .size unsigned_sat_variable_v16i8_using_cmp_sum, .Lfunc_end37-.Lfunc_begin37
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_v16i8_using_cmp_notval # -- Begin function unsigned_sat_variable_v16i8_using_cmp_notval
- .p2align 4
- .type unsigned_sat_variable_v16i8_using_cmp_notval, at function
-unsigned_sat_variable_v16i8_using_cmp_notval: # @unsigned_sat_variable_v16i8_using_cmp_notval
-.Lfunc_begin38:
- .cfi_startproc
-# %bb.0:
- xxlnor 36, 35, 35
- vaddubm 3, 2, 3
- vcmpgtub 2, 2, 4
- xxlor 34, 34, 35
- blr
- .long 0
- .quad 0
-.Lfunc_end38:
- .size unsigned_sat_variable_v16i8_using_cmp_notval, .Lfunc_end38-.Lfunc_begin38
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_v8i16_using_min # -- Begin function unsigned_sat_variable_v8i16_using_min
- .p2align 4
- .type unsigned_sat_variable_v8i16_using_min, at function
-unsigned_sat_variable_v8i16_using_min: # @unsigned_sat_variable_v8i16_using_min
-.Lfunc_begin39:
- .cfi_startproc
-# %bb.0:
- xxlnor 36, 35, 35
- vminuh 2, 2, 4
- vadduhm 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end39:
- .size unsigned_sat_variable_v8i16_using_min, .Lfunc_end39-.Lfunc_begin39
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_v8i16_using_cmp_sum # -- Begin function unsigned_sat_variable_v8i16_using_cmp_sum
- .p2align 4
- .type unsigned_sat_variable_v8i16_using_cmp_sum, at function
-unsigned_sat_variable_v8i16_using_cmp_sum: # @unsigned_sat_variable_v8i16_using_cmp_sum
-.Lfunc_begin40:
- .cfi_startproc
-# %bb.0:
- vadduhs 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end40:
- .size unsigned_sat_variable_v8i16_using_cmp_sum, .Lfunc_end40-.Lfunc_begin40
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_v8i16_using_cmp_notval # -- Begin function unsigned_sat_variable_v8i16_using_cmp_notval
- .p2align 4
- .type unsigned_sat_variable_v8i16_using_cmp_notval, at function
-unsigned_sat_variable_v8i16_using_cmp_notval: # @unsigned_sat_variable_v8i16_using_cmp_notval
-.Lfunc_begin41:
- .cfi_startproc
-# %bb.0:
- xxlnor 36, 35, 35
- vadduhm 3, 2, 3
- vcmpgtuh 2, 2, 4
- xxlor 34, 34, 35
- blr
- .long 0
- .quad 0
-.Lfunc_end41:
- .size unsigned_sat_variable_v8i16_using_cmp_notval, .Lfunc_end41-.Lfunc_begin41
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_v4i32_using_min # -- Begin function unsigned_sat_variable_v4i32_using_min
- .p2align 4
- .type unsigned_sat_variable_v4i32_using_min, at function
-unsigned_sat_variable_v4i32_using_min: # @unsigned_sat_variable_v4i32_using_min
-.Lfunc_begin42:
- .cfi_startproc
-# %bb.0:
- xxlnor 36, 35, 35
- vminuw 2, 2, 4
- vadduwm 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end42:
- .size unsigned_sat_variable_v4i32_using_min, .Lfunc_end42-.Lfunc_begin42
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_v4i32_using_cmp_sum # -- Begin function unsigned_sat_variable_v4i32_using_cmp_sum
- .p2align 4
- .type unsigned_sat_variable_v4i32_using_cmp_sum, at function
-unsigned_sat_variable_v4i32_using_cmp_sum: # @unsigned_sat_variable_v4i32_using_cmp_sum
-.Lfunc_begin43:
- .cfi_startproc
-# %bb.0:
- vadduws 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end43:
- .size unsigned_sat_variable_v4i32_using_cmp_sum, .Lfunc_end43-.Lfunc_begin43
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_v4i32_using_cmp_notval # -- Begin function unsigned_sat_variable_v4i32_using_cmp_notval
- .p2align 4
- .type unsigned_sat_variable_v4i32_using_cmp_notval, at function
-unsigned_sat_variable_v4i32_using_cmp_notval: # @unsigned_sat_variable_v4i32_using_cmp_notval
-.Lfunc_begin44:
- .cfi_startproc
-# %bb.0:
- xxlnor 36, 35, 35
- vadduwm 3, 2, 3
- vcmpgtuw 2, 2, 4
- xxlor 34, 34, 35
- blr
- .long 0
- .quad 0
-.Lfunc_end44:
- .size unsigned_sat_variable_v4i32_using_cmp_notval, .Lfunc_end44-.Lfunc_begin44
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_v2i64_using_min # -- Begin function unsigned_sat_variable_v2i64_using_min
- .p2align 4
- .type unsigned_sat_variable_v2i64_using_min, at function
-unsigned_sat_variable_v2i64_using_min: # @unsigned_sat_variable_v2i64_using_min
-.Lfunc_begin45:
- .cfi_startproc
-# %bb.0:
- xxlnor 36, 35, 35
- vminud 2, 2, 4
- vaddudm 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end45:
- .size unsigned_sat_variable_v2i64_using_min, .Lfunc_end45-.Lfunc_begin45
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_v2i64_using_cmp_sum # -- Begin function unsigned_sat_variable_v2i64_using_cmp_sum
- .p2align 4
- .type unsigned_sat_variable_v2i64_using_cmp_sum, at function
-unsigned_sat_variable_v2i64_using_cmp_sum: # @unsigned_sat_variable_v2i64_using_cmp_sum
-.Lfunc_begin46:
- .cfi_startproc
-# %bb.0:
- xxlnor 36, 35, 35
- vminud 2, 2, 4
- vaddudm 2, 2, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end46:
- .size unsigned_sat_variable_v2i64_using_cmp_sum, .Lfunc_end46-.Lfunc_begin46
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_variable_v2i64_using_cmp_notval # -- Begin function unsigned_sat_variable_v2i64_using_cmp_notval
- .p2align 4
- .type unsigned_sat_variable_v2i64_using_cmp_notval, at function
-unsigned_sat_variable_v2i64_using_cmp_notval: # @unsigned_sat_variable_v2i64_using_cmp_notval
-.Lfunc_begin47:
- .cfi_startproc
-# %bb.0:
- xxlnor 36, 35, 35
- vaddudm 3, 2, 3
- vcmpgtud 2, 2, 4
- xxlor 34, 34, 35
- blr
- .long 0
- .quad 0
-.Lfunc_end47:
- .size unsigned_sat_variable_v2i64_using_cmp_notval, .Lfunc_end47-.Lfunc_begin47
- .cfi_endproc
- # -- End function
- .section .rodata.cst16,"aM", at progbits,16
- .p2align 4, 0x0 # -- Begin function sadd
-.LCPI48_0:
- .quad 0 # 0x0
- .quad -9223372036854775808 # 0x8000000000000000
- .text
- .globl sadd
- .p2align 4
- .type sadd, at function
-sadd: # @sadd
-.Lfunc_begin48:
- .cfi_startproc
-.Lfunc_gep48:
- addis 2, 12, .TOC.-.Lfunc_gep48 at ha
- addi 2, 2, .TOC.-.Lfunc_gep48 at l
-.Lfunc_lep48:
- .localentry sadd, .Lfunc_lep48-.Lfunc_gep48
-# %bb.0:
- vadduqm 0, 2, 6
- vadduqm 10, 4, 8
- mfocrf 12, 32
- stw 12, 8(1)
- xxswapd 0, 34
- xxswapd 4, 36
- vadduqm 1, 3, 7
- vadduqm 11, 5, 9
- mffprd 3, 0
- mffprd 6, 4
- lwz 12, 8(1)
- xxswapd 2, 35
- xxswapd 5, 37
- mffprd 4, 2
- xxswapd 1, 32
- xxswapd 6, 42
- mffprd 5, 1
- cmpld 6, 5, 3
- mffprd 7, 6
- xxswapd 3, 33
- xxswapd 7, 43
- mffprd 3, 3
- cmpld 5, 7, 6
- mffprd 6, 5
- mffprd 7, 7
- mfvsrd 5, 36
- cmpld 3, 4
- mfvsrd 3, 34
- cmpld 1, 7, 6
- mfvsrd 7, 32
- mfvsrd 4, 35
- mfvsrd 6, 37
- cmpld 7, 7, 3
- cmpd 2, 7, 3
- mfvsrd 3, 33
- crandc 21, 8, 30
- crand 22, 30, 24
- cmpld 6, 3, 4
- cmpd 7, 3, 4
- mfvsrd 4, 42
- sradi 3, 3, 63
- mtocrf 32, 12
- crnor 21, 22, 21
- crandc 23, 28, 26
- crand 24, 26, 0
- cmpld 4, 5
- cmpd 7, 4, 5
- mfvsrd 5, 43
- crnor 22, 24, 23
- mtfprd 5, 3
- sradi 4, 4, 63
- mtfprd 6, 4
- crandc 25, 28, 2
- crand 20, 2, 20
- cmpld 5, 6
- cmpd 7, 5, 6
- mfvsrd 6, 38
- sradi 5, 5, 63
- crnor 20, 20, 25
- mtfprd 7, 5
- sradi 6, 6, 63
- crandc 26, 28, 2
- crand 27, 2, 4
- crnor 23, 27, 26
- mtfprd 0, 6
- mfvsrd 6, 39
- sradi 6, 6, 63
- mtfprd 1, 6
- mfvsrd 6, 40
- sradi 6, 6, 63
- mtfprd 2, 6
- mfvsrd 6, 41
- sradi 6, 6, 63
- mtfprd 3, 6
- sradi 6, 7, 63
- mtfprd 4, 6
- li 6, -1
- isel 3, 0, 6, 21
- isel 4, 0, 6, 22
- isel 5, 0, 6, 20
- isel 6, 0, 6, 23
- mtfprd 8, 3
- addis 3, 2, .LCPI48_0 at toc@ha
- mtfprd 10, 4
- mtfprd 11, 5
- mtfprd 12, 6
- addi 3, 3, .LCPI48_0 at toc@l
- lxvd2x 9, 0, 3
- xxspltd 45, 6, 0
- xxspltd 46, 7, 0
- xxspltd 34, 0, 0
- xxspltd 40, 5, 0
- xxspltd 35, 1, 0
- xxspltd 36, 2, 0
- xxspltd 38, 3, 0
- xxspltd 39, 4, 0
- xxspltd 41, 8, 0
- xxspltd 44, 10, 0
- xxspltd 47, 11, 0
- xxspltd 48, 12, 0
- xxlxor 0, 34, 41
- xxlxor 1, 35, 44
- xxswapd 37, 9
- xxlxor 2, 39, 37
- xxlxor 3, 40, 37
- xxsel 34, 32, 2, 0
- xxsel 35, 33, 3, 1
- xxlxor 0, 36, 47
- xxlxor 1, 45, 37
- xxsel 36, 42, 1, 0
- xxlxor 0, 38, 48
- xxlxor 1, 46, 37
- xxsel 37, 43, 1, 0
- blr
- .long 0
- .quad 0
-.Lfunc_end48:
- .size sadd, .Lfunc_end48-.Lfunc_begin48
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i64_with_single_use # -- Begin function unsigned_sat_constant_i64_with_single_use
- .p2align 4
- .type unsigned_sat_constant_i64_with_single_use, at function
-unsigned_sat_constant_i64_with_single_use: # @unsigned_sat_constant_i64_with_single_use
-.Lfunc_begin49:
- .cfi_startproc
-# %bb.0:
- li 4, 4
- subc 3, 3, 4
- li 4, 0
- addze. 4, 4
- iseleq 3, 0, 3
- blr
- .long 0
- .quad 0
-.Lfunc_end49:
- .size unsigned_sat_constant_i64_with_single_use, .Lfunc_end49-.Lfunc_begin49
- .cfi_endproc
- # -- End function
- .globl unsigned_sat_constant_i64_with_multiple_use # -- Begin function unsigned_sat_constant_i64_with_multiple_use
- .p2align 4
- .type unsigned_sat_constant_i64_with_multiple_use, at function
-unsigned_sat_constant_i64_with_multiple_use: # @unsigned_sat_constant_i64_with_multiple_use
-.Lfunc_begin50:
- .cfi_startproc
-# %bb.0:
- cmpldi 3, 4
- li 5, 4
- isellt 5, 3, 5
- sub 3, 3, 5
- add 4, 4, 5
- mulld 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end50:
- .size unsigned_sat_constant_i64_with_multiple_use, .Lfunc_end50-.Lfunc_begin50
- .cfi_endproc
- # -- End function
- .section ".note.GNU-stack","", at progbits
diff --git a/llvm/test/CodeGen/RISCV/branch-on-zero.ll b/llvm/test/CodeGen/RISCV/branch-on-zero.ll
index 02aeebdeb3775..b1d396d70ff5f 100644
--- a/llvm/test/CodeGen/RISCV/branch-on-zero.ll
+++ b/llvm/test/CodeGen/RISCV/branch-on-zero.ll
@@ -8,19 +8,21 @@ define i32 @optbranch_32(i32 %Arg) {
; RV32-LABEL: optbranch_32:
; RV32: # %bb.0: # %bb
; RV32-NEXT: addi a0, a0, 1
-; RV32-NEXT: bnez a0, .LBB0_2
-; RV32-NEXT: # %bb.1: # %bb2
+; RV32-NEXT: beqz a0, .LBB0_2
+; RV32-NEXT: # %bb.1: # %bb3
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB0_2: # %bb2
; RV32-NEXT: li a0, -1
-; RV32-NEXT: .LBB0_2: # %bb3
; RV32-NEXT: ret
;
; RV64-LABEL: optbranch_32:
; RV64: # %bb.0: # %bb
; RV64-NEXT: addiw a0, a0, 1
-; RV64-NEXT: bnez a0, .LBB0_2
-; RV64-NEXT: # %bb.1: # %bb2
+; RV64-NEXT: beqz a0, .LBB0_2
+; RV64-NEXT: # %bb.1: # %bb3
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB0_2: # %bb2
; RV64-NEXT: li a0, -1
-; RV64-NEXT: .LBB0_2: # %bb3
; RV64-NEXT: ret
bb:
%i1 = icmp eq i32 %Arg, -1
@@ -41,20 +43,22 @@ define i64 @optbranch_64(i64 %Arg) {
; RV32-NEXT: seqz a2, a0
; RV32-NEXT: add a1, a1, a2
; RV32-NEXT: or a2, a0, a1
-; RV32-NEXT: bnez a2, .LBB1_2
-; RV32-NEXT: # %bb.1: # %bb2
+; RV32-NEXT: beqz a2, .LBB1_2
+; RV32-NEXT: # %bb.1: # %bb3
+; RV32-NEXT: ret
+; RV32-NEXT: .LBB1_2: # %bb2
; RV32-NEXT: li a0, -1
; RV32-NEXT: li a1, -1
-; RV32-NEXT: .LBB1_2: # %bb3
; RV32-NEXT: ret
;
; RV64-LABEL: optbranch_64:
; RV64: # %bb.0: # %bb
; RV64-NEXT: addi a0, a0, 1
-; RV64-NEXT: bnez a0, .LBB1_2
-; RV64-NEXT: # %bb.1: # %bb2
+; RV64-NEXT: beqz a0, .LBB1_2
+; RV64-NEXT: # %bb.1: # %bb3
+; RV64-NEXT: ret
+; RV64-NEXT: .LBB1_2: # %bb2
; RV64-NEXT: li a0, -1
-; RV64-NEXT: .LBB1_2: # %bb3
; RV64-NEXT: ret
bb:
%i1 = icmp eq i64 %Arg, -1
diff --git a/llvm/test/CodeGen/X86/psubus.ll b/llvm/test/CodeGen/X86/psubus.ll
index 376bbb7018700..e10b360b35b56 100644
--- a/llvm/test/CodeGen/X86/psubus.ll
+++ b/llvm/test/CodeGen/X86/psubus.ll
@@ -254,60 +254,33 @@ vector.ph:
}
define <8 x i16> @test3(<8 x i16> %x, i16 zeroext %w) nounwind {
-; SSE2OR3-LABEL: test3:
-; SSE2OR3: # %bb.0: # %vector.ph
-; SSE2OR3-NEXT: movd %edi, %xmm1
-; SSE2OR3-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE2OR3-NEXT: pshufd {{.*#+}} xmm2 = xmm1[0,1,0,1]
-; SSE2OR3-NEXT: movdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE2OR3-NEXT: movdqa %xmm0, %xmm3
-; SSE2OR3-NEXT: pxor %xmm1, %xmm3
-; SSE2OR3-NEXT: psubw %xmm2, %xmm0
-; SSE2OR3-NEXT: pxor %xmm0, %xmm1
-; SSE2OR3-NEXT: pcmpgtw %xmm3, %xmm1
-; SSE2OR3-NEXT: pandn %xmm0, %xmm1
-; SSE2OR3-NEXT: movdqa %xmm1, %xmm0
-; SSE2OR3-NEXT: retq
-;
-; SSE41-LABEL: test3:
-; SSE41: # %bb.0: # %vector.ph
-; SSE41-NEXT: movd %edi, %xmm1
-; SSE41-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
-; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psubw %xmm1, %xmm2
-; SSE41-NEXT: pminuw %xmm2, %xmm0
-; SSE41-NEXT: pcmpeqw %xmm2, %xmm0
-; SSE41-NEXT: pand %xmm2, %xmm0
-; SSE41-NEXT: retq
+; SSE-LABEL: test3:
+; SSE: # %bb.0: # %vector.ph
+; SSE-NEXT: movd %edi, %xmm1
+; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
+; SSE-NEXT: psubusw %xmm1, %xmm0
+; SSE-NEXT: retq
;
; AVX1-LABEL: test3:
; AVX1: # %bb.0: # %vector.ph
; AVX1-NEXT: vmovd %edi, %xmm1
; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpminuw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpeqw %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test3:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vmovd %edi, %xmm1
; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1
-; AVX2-NEXT: vpsubw %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpminuw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpcmpeqw %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test3:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastw %edi, %xmm1
-; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm1
-; AVX512-NEXT: vpcmpleuw %xmm0, %xmm1, %k1
-; AVX512-NEXT: vmovdqu16 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <8 x i16> undef, i16 %w, i32 0
@@ -359,11 +332,7 @@ define <16 x i8> @test6(<16 x i8> %x, i8 zeroext %w) nounwind {
; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: psubb %xmm1, %xmm2
-; SSE2-NEXT: pminub %xmm2, %xmm0
-; SSE2-NEXT: pcmpeqb %xmm2, %xmm0
-; SSE2-NEXT: pand %xmm2, %xmm0
+; SSE2-NEXT: psubusb %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test6:
@@ -371,11 +340,7 @@ define <16 x i8> @test6(<16 x i8> %x, i8 zeroext %w) nounwind {
; SSSE3-NEXT: movd %edi, %xmm1
; SSSE3-NEXT: pxor %xmm2, %xmm2
; SSSE3-NEXT: pshufb %xmm2, %xmm1
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: psubb %xmm1, %xmm2
-; SSSE3-NEXT: pminub %xmm2, %xmm0
-; SSSE3-NEXT: pcmpeqb %xmm2, %xmm0
-; SSSE3-NEXT: pand %xmm2, %xmm0
+; SSSE3-NEXT: psubusb %xmm1, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test6:
@@ -383,11 +348,7 @@ define <16 x i8> @test6(<16 x i8> %x, i8 zeroext %w) nounwind {
; SSE41-NEXT: movd %edi, %xmm1
; SSE41-NEXT: pxor %xmm2, %xmm2
; SSE41-NEXT: pshufb %xmm2, %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm2
-; SSE41-NEXT: psubb %xmm1, %xmm2
-; SSE41-NEXT: pminub %xmm2, %xmm0
-; SSE41-NEXT: pcmpeqb %xmm2, %xmm0
-; SSE41-NEXT: pand %xmm2, %xmm0
+; SSE41-NEXT: psubusb %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: test6:
@@ -395,28 +356,20 @@ define <16 x i8> @test6(<16 x i8> %x, i8 zeroext %w) nounwind {
; AVX1-NEXT: vmovd %edi, %xmm1
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpminub %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpeqb %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test6:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vmovd %edi, %xmm1
; AVX2-NEXT: vpbroadcastb %xmm1, %xmm1
-; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm1
-; AVX2-NEXT: vpminub %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpcmpeqb %xmm0, %xmm1, %xmm0
-; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test6:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastb %edi, %xmm1
-; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm1
-; AVX512-NEXT: vpcmpleub %xmm0, %xmm1, %k1
-; AVX512-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <16 x i8> undef, i8 %w, i32 0
@@ -589,45 +542,14 @@ vector.ph:
}
define <16 x i16> @test9(<16 x i16> %x, i16 zeroext %w) nounwind {
-; SSE2OR3-LABEL: test9:
-; SSE2OR3: # %bb.0: # %vector.ph
-; SSE2OR3-NEXT: movd %edi, %xmm2
-; SSE2OR3-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
-; SSE2OR3-NEXT: pshufd {{.*#+}} xmm4 = xmm2[0,1,0,1]
-; SSE2OR3-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE2OR3-NEXT: movdqa %xmm1, %xmm5
-; SSE2OR3-NEXT: pxor %xmm2, %xmm5
-; SSE2OR3-NEXT: psubw %xmm4, %xmm1
-; SSE2OR3-NEXT: movdqa %xmm1, %xmm3
-; SSE2OR3-NEXT: pxor %xmm2, %xmm3
-; SSE2OR3-NEXT: pcmpgtw %xmm5, %xmm3
-; SSE2OR3-NEXT: movdqa %xmm0, %xmm5
-; SSE2OR3-NEXT: pxor %xmm2, %xmm5
-; SSE2OR3-NEXT: psubw %xmm4, %xmm0
-; SSE2OR3-NEXT: pxor %xmm0, %xmm2
-; SSE2OR3-NEXT: pcmpgtw %xmm5, %xmm2
-; SSE2OR3-NEXT: pandn %xmm0, %xmm2
-; SSE2OR3-NEXT: pandn %xmm1, %xmm3
-; SSE2OR3-NEXT: movdqa %xmm2, %xmm0
-; SSE2OR3-NEXT: movdqa %xmm3, %xmm1
-; SSE2OR3-NEXT: retq
-;
-; SSE41-LABEL: test9:
-; SSE41: # %bb.0: # %vector.ph
-; SSE41-NEXT: movd %edi, %xmm2
-; SSE41-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
-; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
-; SSE41-NEXT: movdqa %xmm1, %xmm3
-; SSE41-NEXT: psubw %xmm2, %xmm3
-; SSE41-NEXT: pminuw %xmm3, %xmm1
-; SSE41-NEXT: pcmpeqw %xmm3, %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm4
-; SSE41-NEXT: psubw %xmm2, %xmm4
-; SSE41-NEXT: pminuw %xmm4, %xmm0
-; SSE41-NEXT: pcmpeqw %xmm4, %xmm0
-; SSE41-NEXT: pand %xmm4, %xmm0
-; SSE41-NEXT: pand %xmm3, %xmm1
-; SSE41-NEXT: retq
+; SSE-LABEL: test9:
+; SSE: # %bb.0: # %vector.ph
+; SSE-NEXT: movd %edi, %xmm2
+; SSE-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
+; SSE-NEXT: psubusw %xmm2, %xmm0
+; SSE-NEXT: psubusw %xmm2, %xmm1
+; SSE-NEXT: retq
;
; AVX1-LABEL: test9:
; AVX1: # %bb.0: # %vector.ph
@@ -635,33 +557,22 @@ define <16 x i16> @test9(<16 x i16> %x, i16 zeroext %w) nounwind {
; AVX1-NEXT: vmovd %edi, %xmm2
; AVX1-NEXT: vpshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
-; AVX1-NEXT: vpsubw %xmm2, %xmm1, %xmm3
-; AVX1-NEXT: vpminuw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpcmpeqw %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpsubw %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpminuw %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vpcmpeqw %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vpsubusw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsubusw %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm1
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test9:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vmovd %edi, %xmm1
; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1
-; AVX2-NEXT: vpsubw %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vpminuw %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpcmpeqw %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubusw %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test9:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastw %edi, %ymm1
-; AVX512-NEXT: vpsubw %ymm1, %ymm0, %ymm1
-; AVX512-NEXT: vpcmpleuw %ymm0, %ymm1, %k1
-; AVX512-NEXT: vmovdqu16 %ymm1, %ymm0 {%k1} {z}
+; AVX512-NEXT: vpsubusw %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <16 x i16> undef, i16 %w, i32 0
@@ -776,16 +687,8 @@ define <32 x i8> @test12(<32 x i8> %x, i8 zeroext %w) nounwind {
; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[0,1,0,1]
-; SSE2-NEXT: movdqa %xmm1, %xmm3
-; SSE2-NEXT: psubb %xmm2, %xmm3
-; SSE2-NEXT: pminub %xmm3, %xmm1
-; SSE2-NEXT: pcmpeqb %xmm3, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: psubb %xmm2, %xmm4
-; SSE2-NEXT: pminub %xmm4, %xmm0
-; SSE2-NEXT: pcmpeqb %xmm4, %xmm0
-; SSE2-NEXT: pand %xmm4, %xmm0
-; SSE2-NEXT: pand %xmm3, %xmm1
+; SSE2-NEXT: psubusb %xmm2, %xmm0
+; SSE2-NEXT: psubusb %xmm2, %xmm1
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test12:
@@ -793,16 +696,8 @@ define <32 x i8> @test12(<32 x i8> %x, i8 zeroext %w) nounwind {
; SSSE3-NEXT: movd %edi, %xmm2
; SSSE3-NEXT: pxor %xmm3, %xmm3
; SSSE3-NEXT: pshufb %xmm3, %xmm2
-; SSSE3-NEXT: movdqa %xmm1, %xmm3
-; SSSE3-NEXT: psubb %xmm2, %xmm3
-; SSSE3-NEXT: pminub %xmm3, %xmm1
-; SSSE3-NEXT: pcmpeqb %xmm3, %xmm1
-; SSSE3-NEXT: movdqa %xmm0, %xmm4
-; SSSE3-NEXT: psubb %xmm2, %xmm4
-; SSSE3-NEXT: pminub %xmm4, %xmm0
-; SSSE3-NEXT: pcmpeqb %xmm4, %xmm0
-; SSSE3-NEXT: pand %xmm4, %xmm0
-; SSSE3-NEXT: pand %xmm3, %xmm1
+; SSSE3-NEXT: psubusb %xmm2, %xmm0
+; SSSE3-NEXT: psubusb %xmm2, %xmm1
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test12:
@@ -810,16 +705,8 @@ define <32 x i8> @test12(<32 x i8> %x, i8 zeroext %w) nounwind {
; SSE41-NEXT: movd %edi, %xmm2
; SSE41-NEXT: pxor %xmm3, %xmm3
; SSE41-NEXT: pshufb %xmm3, %xmm2
-; SSE41-NEXT: movdqa %xmm1, %xmm3
-; SSE41-NEXT: psubb %xmm2, %xmm3
-; SSE41-NEXT: pminub %xmm3, %xmm1
-; SSE41-NEXT: pcmpeqb %xmm3, %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm4
-; SSE41-NEXT: psubb %xmm2, %xmm4
-; SSE41-NEXT: pminub %xmm4, %xmm0
-; SSE41-NEXT: pcmpeqb %xmm4, %xmm0
-; SSE41-NEXT: pand %xmm4, %xmm0
-; SSE41-NEXT: pand %xmm3, %xmm1
+; SSE41-NEXT: psubusb %xmm2, %xmm0
+; SSE41-NEXT: psubusb %xmm2, %xmm1
; SSE41-NEXT: retq
;
; AVX1-LABEL: test12:
@@ -828,33 +715,22 @@ define <32 x i8> @test12(<32 x i8> %x, i8 zeroext %w) nounwind {
; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsubb %xmm1, %xmm2, %xmm3
-; AVX1-NEXT: vpminub %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpcmpeqb %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpminub %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpeqb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpsubusb %xmm1, %xmm2, %xmm2
+; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
-; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: test12:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vmovd %edi, %xmm1
; AVX2-NEXT: vpbroadcastb %xmm1, %ymm1
-; AVX2-NEXT: vpsubb %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vpminub %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpcmpeqb %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0
+; AVX2-NEXT: vpsubusb %ymm1, %ymm0, %ymm0
; AVX2-NEXT: retq
;
; AVX512-LABEL: test12:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastb %edi, %ymm1
-; AVX512-NEXT: vpsubb %ymm1, %ymm0, %ymm1
-; AVX512-NEXT: vpcmpleub %ymm0, %ymm1, %k1
-; AVX512-NEXT: vmovdqu8 %ymm1, %ymm0 {%k1} {z}
+; AVX512-NEXT: vpsubusb %ymm1, %ymm0, %ymm0
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <32 x i8> undef, i8 %w, i32 0
@@ -868,122 +744,87 @@ vector.ph:
define <8 x i16> @test13(<8 x i16> %x, <8 x i32> %y) nounwind {
; SSE2-LABEL: test13:
; SSE2: # %bb.0: # %vector.ph
-; SSE2-NEXT: pxor %xmm3, %xmm3
-; SSE2-NEXT: movdqa %xmm0, %xmm4
-; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2-NEXT: movdqa %xmm0, %xmm5
-; SSE2-NEXT: por %xmm3, %xmm5
-; SSE2-NEXT: psubd %xmm2, %xmm0
-; SSE2-NEXT: movdqa %xmm0, %xmm2
-; SSE2-NEXT: pxor %xmm3, %xmm2
-; SSE2-NEXT: pcmpgtd %xmm5, %xmm2
-; SSE2-NEXT: movdqa %xmm4, %xmm5
-; SSE2-NEXT: por %xmm3, %xmm5
-; SSE2-NEXT: psubd %xmm1, %xmm4
-; SSE2-NEXT: pxor %xmm4, %xmm3
-; SSE2-NEXT: pcmpgtd %xmm5, %xmm3
-; SSE2-NEXT: packssdw %xmm2, %xmm3
-; SSE2-NEXT: pslld $16, %xmm0
-; SSE2-NEXT: psrad $16, %xmm0
-; SSE2-NEXT: pslld $16, %xmm4
-; SSE2-NEXT: psrad $16, %xmm4
-; SSE2-NEXT: packssdw %xmm0, %xmm4
-; SSE2-NEXT: pandn %xmm4, %xmm3
-; SSE2-NEXT: movdqa %xmm3, %xmm0
+; SSE2-NEXT: movdqa %xmm2, %xmm4
+; SSE2-NEXT: pxor %xmm3, %xmm4
+; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
+; SSE2-NEXT: movdqa %xmm5, %xmm6
+; SSE2-NEXT: pcmpgtd %xmm4, %xmm6
+; SSE2-NEXT: pcmpeqd %xmm4, %xmm4
+; SSE2-NEXT: pand %xmm6, %xmm2
+; SSE2-NEXT: pxor %xmm4, %xmm6
+; SSE2-NEXT: por %xmm2, %xmm6
+; SSE2-NEXT: pslld $16, %xmm6
+; SSE2-NEXT: psrad $16, %xmm6
+; SSE2-NEXT: pxor %xmm1, %xmm3
+; SSE2-NEXT: pcmpgtd %xmm3, %xmm5
+; SSE2-NEXT: pxor %xmm5, %xmm4
+; SSE2-NEXT: pand %xmm1, %xmm5
+; SSE2-NEXT: por %xmm4, %xmm5
+; SSE2-NEXT: pslld $16, %xmm5
+; SSE2-NEXT: psrad $16, %xmm5
+; SSE2-NEXT: packssdw %xmm6, %xmm5
+; SSE2-NEXT: psubusw %xmm5, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test13:
; SSSE3: # %bb.0: # %vector.ph
-; SSSE3-NEXT: pxor %xmm3, %xmm3
-; SSSE3-NEXT: movdqa %xmm0, %xmm4
-; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3]
-; SSSE3-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
; SSSE3-NEXT: movdqa {{.*#+}} xmm3 = [2147483648,2147483648,2147483648,2147483648]
-; SSSE3-NEXT: movdqa %xmm0, %xmm5
-; SSSE3-NEXT: por %xmm3, %xmm5
-; SSSE3-NEXT: psubd %xmm2, %xmm0
-; SSSE3-NEXT: movdqa %xmm0, %xmm2
-; SSSE3-NEXT: pxor %xmm3, %xmm2
-; SSSE3-NEXT: pcmpgtd %xmm5, %xmm2
-; SSSE3-NEXT: movdqa %xmm4, %xmm5
-; SSSE3-NEXT: por %xmm3, %xmm5
-; SSSE3-NEXT: psubd %xmm1, %xmm4
-; SSSE3-NEXT: pxor %xmm4, %xmm3
-; SSSE3-NEXT: pcmpgtd %xmm5, %xmm3
-; SSSE3-NEXT: packssdw %xmm2, %xmm3
-; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; SSSE3-NEXT: pshufb %xmm1, %xmm0
-; SSSE3-NEXT: pshufb %xmm1, %xmm4
-; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm4 = xmm4[0],xmm0[0]
-; SSSE3-NEXT: pandn %xmm4, %xmm3
-; SSSE3-NEXT: movdqa %xmm3, %xmm0
+; SSSE3-NEXT: movdqa %xmm2, %xmm4
+; SSSE3-NEXT: pxor %xmm3, %xmm4
+; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2147549183,2147549183,2147549183,2147549183]
+; SSSE3-NEXT: movdqa %xmm5, %xmm6
+; SSSE3-NEXT: pcmpgtd %xmm4, %xmm6
+; SSSE3-NEXT: pcmpeqd %xmm4, %xmm4
+; SSSE3-NEXT: pand %xmm6, %xmm2
+; SSSE3-NEXT: pxor %xmm4, %xmm6
+; SSSE3-NEXT: por %xmm2, %xmm6
+; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; SSSE3-NEXT: pshufb %xmm2, %xmm6
+; SSSE3-NEXT: pxor %xmm1, %xmm3
+; SSSE3-NEXT: pcmpgtd %xmm3, %xmm5
+; SSSE3-NEXT: pxor %xmm5, %xmm4
+; SSSE3-NEXT: pand %xmm1, %xmm5
+; SSSE3-NEXT: por %xmm4, %xmm5
+; SSSE3-NEXT: pshufb %xmm2, %xmm5
+; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm6[0]
+; SSSE3-NEXT: psubusw %xmm5, %xmm0
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test13:
; SSE41: # %bb.0: # %vector.ph
-; SSE41-NEXT: pxor %xmm3, %xmm3
-; SSE41-NEXT: pmovzxwd {{.*#+}} xmm4 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7]
-; SSE41-NEXT: movdqa %xmm0, %xmm5
-; SSE41-NEXT: psubd %xmm2, %xmm5
-; SSE41-NEXT: pminud %xmm5, %xmm0
-; SSE41-NEXT: pcmpeqd %xmm5, %xmm0
-; SSE41-NEXT: movdqa %xmm4, %xmm2
-; SSE41-NEXT: psubd %xmm1, %xmm2
-; SSE41-NEXT: pminud %xmm2, %xmm4
-; SSE41-NEXT: pcmpeqd %xmm2, %xmm4
-; SSE41-NEXT: packssdw %xmm0, %xmm4
-; SSE41-NEXT: pblendw {{.*#+}} xmm5 = xmm5[0],xmm3[1],xmm5[2],xmm3[3],xmm5[4],xmm3[5],xmm5[6],xmm3[7]
-; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1],xmm2[2],xmm3[3],xmm2[4],xmm3[5],xmm2[6],xmm3[7]
-; SSE41-NEXT: packusdw %xmm5, %xmm2
-; SSE41-NEXT: pand %xmm4, %xmm2
-; SSE41-NEXT: movdqa %xmm2, %xmm0
+; SSE41-NEXT: pmovsxbw {{.*#+}} xmm3 = [65535,0,65535,0,65535,0,65535,0]
+; SSE41-NEXT: pminud %xmm3, %xmm2
+; SSE41-NEXT: pminud %xmm3, %xmm1
+; SSE41-NEXT: packusdw %xmm2, %xmm1
+; SSE41-NEXT: psubusw %xmm1, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: test13:
; AVX1: # %bb.0: # %vector.ph
-; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm3 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7]
-; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
-; AVX1-NEXT: vpsubd %xmm4, %xmm3, %xmm4
-; AVX1-NEXT: vpminud %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vpcmpeqd %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpminud %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpcmpeqd %xmm0, %xmm1, %xmm0
-; AVX1-NEXT: vpackssdw %xmm3, %xmm0, %xmm0
-; AVX1-NEXT: vpblendw {{.*#+}} xmm3 = xmm4[0],xmm2[1],xmm4[2],xmm2[3],xmm4[4],xmm2[5],xmm4[6],xmm2[7]
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
-; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1
-; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vbroadcastss {{.*#+}} xmm3 = [65535,65535,65535,65535]
+; AVX1-NEXT: vpminud %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpminud %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
; AVX2-LABEL: test13:
; AVX2: # %bb.0: # %vector.ph
-; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX2-NEXT: vpsubd %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vpminud %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpcmpeqd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; AVX2-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT: vpackssdw %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u]
-; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
-; AVX2-NEXT: vpandn %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535]
+; AVX2-NEXT: vpminud %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpackusdw %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test13:
; AVX512: # %bb.0: # %vector.ph
-; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
-; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm1
-; AVX512-NEXT: vpcmpleud %ymm0, %ymm1, %k1
-; AVX512-NEXT: vpmovdw %ymm1, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpmovusdw %ymm1, %xmm1
+; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
vector.ph:
@@ -999,92 +840,80 @@ vector.ph:
define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
; SSE2OR3-LABEL: test14:
; SSE2OR3: # %bb.0: # %vector.ph
+; SSE2OR3-NEXT: pxor %xmm6, %xmm6
; SSE2OR3-NEXT: movdqa %xmm0, %xmm5
-; SSE2OR3-NEXT: pxor %xmm0, %xmm0
-; SSE2OR3-NEXT: movdqa %xmm5, %xmm7
-; SSE2OR3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm0[0],xmm7[1],xmm0[1],xmm7[2],xmm0[2],xmm7[3],xmm0[3],xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
-; SSE2OR3-NEXT: movdqa %xmm7, %xmm6
-; SSE2OR3-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm0[0],xmm6[1],xmm0[1],xmm6[2],xmm0[2],xmm6[3],xmm0[3]
-; SSE2OR3-NEXT: punpckhwd {{.*#+}} xmm7 = xmm7[4],xmm0[4],xmm7[5],xmm0[5],xmm7[6],xmm0[6],xmm7[7],xmm0[7]
-; SSE2OR3-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15]
-; SSE2OR3-NEXT: movdqa %xmm5, %xmm8
-; SSE2OR3-NEXT: punpcklwd {{.*#+}} xmm8 = xmm8[0],xmm0[0],xmm8[1],xmm0[1],xmm8[2],xmm0[2],xmm8[3],xmm0[3]
-; SSE2OR3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7]
-; SSE2OR3-NEXT: movdqa {{.*#+}} xmm0 = [2147483648,2147483648,2147483648,2147483648]
-; SSE2OR3-NEXT: movdqa %xmm4, %xmm9
-; SSE2OR3-NEXT: pxor %xmm0, %xmm9
-; SSE2OR3-NEXT: psubd %xmm5, %xmm4
-; SSE2OR3-NEXT: movdqa %xmm4, %xmm5
-; SSE2OR3-NEXT: pxor %xmm0, %xmm5
-; SSE2OR3-NEXT: pcmpgtd %xmm9, %xmm5
-; SSE2OR3-NEXT: movdqa %xmm3, %xmm9
-; SSE2OR3-NEXT: pxor %xmm0, %xmm9
-; SSE2OR3-NEXT: psubd %xmm8, %xmm3
+; SSE2OR3-NEXT: movdqa %xmm4, %xmm7
; SSE2OR3-NEXT: movdqa %xmm3, %xmm8
-; SSE2OR3-NEXT: pxor %xmm0, %xmm8
-; SSE2OR3-NEXT: pcmpgtd %xmm9, %xmm8
-; SSE2OR3-NEXT: packssdw %xmm5, %xmm8
-; SSE2OR3-NEXT: movdqa %xmm2, %xmm5
-; SSE2OR3-NEXT: pxor %xmm0, %xmm5
-; SSE2OR3-NEXT: psubd %xmm7, %xmm2
-; SSE2OR3-NEXT: movdqa %xmm2, %xmm7
-; SSE2OR3-NEXT: pxor %xmm0, %xmm7
-; SSE2OR3-NEXT: pcmpgtd %xmm5, %xmm7
-; SSE2OR3-NEXT: movdqa %xmm1, %xmm5
-; SSE2OR3-NEXT: pxor %xmm0, %xmm5
-; SSE2OR3-NEXT: psubd %xmm6, %xmm1
-; SSE2OR3-NEXT: pxor %xmm1, %xmm0
-; SSE2OR3-NEXT: pcmpgtd %xmm5, %xmm0
-; SSE2OR3-NEXT: packssdw %xmm7, %xmm0
-; SSE2OR3-NEXT: packsswb %xmm8, %xmm0
-; SSE2OR3-NEXT: movdqa {{.*#+}} xmm5 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; SSE2OR3-NEXT: pand %xmm5, %xmm4
-; SSE2OR3-NEXT: pand %xmm5, %xmm3
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm9
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm10 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; SSE2OR3-NEXT: pand %xmm10, %xmm4
+; SSE2OR3-NEXT: pand %xmm10, %xmm3
; SSE2OR3-NEXT: packuswb %xmm4, %xmm3
-; SSE2OR3-NEXT: pand %xmm5, %xmm2
-; SSE2OR3-NEXT: pand %xmm5, %xmm1
+; SSE2OR3-NEXT: movdqa %xmm1, %xmm4
+; SSE2OR3-NEXT: pand %xmm10, %xmm2
+; SSE2OR3-NEXT: pand %xmm10, %xmm1
; SSE2OR3-NEXT: packuswb %xmm2, %xmm1
; SSE2OR3-NEXT: packuswb %xmm3, %xmm1
+; SSE2OR3-NEXT: psubb %xmm0, %xmm1
+; SSE2OR3-NEXT: movdqa %xmm0, %xmm2
+; SSE2OR3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm6[0],xmm2[1],xmm6[1],xmm2[2],xmm6[2],xmm2[3],xmm6[3],xmm2[4],xmm6[4],xmm2[5],xmm6[5],xmm2[6],xmm6[6],xmm2[7],xmm6[7]
+; SSE2OR3-NEXT: movdqa %xmm2, %xmm0
+; SSE2OR3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3]
+; SSE2OR3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm6[4],xmm2[5],xmm6[5],xmm2[6],xmm6[6],xmm2[7],xmm6[7]
+; SSE2OR3-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm6[8],xmm5[9],xmm6[9],xmm5[10],xmm6[10],xmm5[11],xmm6[11],xmm5[12],xmm6[12],xmm5[13],xmm6[13],xmm5[14],xmm6[14],xmm5[15],xmm6[15]
+; SSE2OR3-NEXT: movdqa %xmm5, %xmm3
+; SSE2OR3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm6[0],xmm3[1],xmm6[1],xmm3[2],xmm6[2],xmm3[3],xmm6[3]
+; SSE2OR3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm6[4],xmm5[5],xmm6[5],xmm5[6],xmm6[6],xmm5[7],xmm6[7]
+; SSE2OR3-NEXT: movdqa {{.*#+}} xmm6 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2OR3-NEXT: pxor %xmm6, %xmm7
+; SSE2OR3-NEXT: por %xmm6, %xmm5
+; SSE2OR3-NEXT: pcmpgtd %xmm7, %xmm5
+; SSE2OR3-NEXT: pxor %xmm6, %xmm8
+; SSE2OR3-NEXT: por %xmm6, %xmm3
+; SSE2OR3-NEXT: pcmpgtd %xmm8, %xmm3
+; SSE2OR3-NEXT: packssdw %xmm5, %xmm3
+; SSE2OR3-NEXT: pxor %xmm6, %xmm9
+; SSE2OR3-NEXT: por %xmm6, %xmm2
+; SSE2OR3-NEXT: pcmpgtd %xmm9, %xmm2
+; SSE2OR3-NEXT: pxor %xmm6, %xmm4
+; SSE2OR3-NEXT: por %xmm6, %xmm0
+; SSE2OR3-NEXT: pcmpgtd %xmm4, %xmm0
+; SSE2OR3-NEXT: packssdw %xmm2, %xmm0
+; SSE2OR3-NEXT: packsswb %xmm3, %xmm0
; SSE2OR3-NEXT: pandn %xmm1, %xmm0
; SSE2OR3-NEXT: retq
;
; SSE41-LABEL: test14:
; SSE41: # %bb.0: # %vector.ph
; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[1,1,1,1]
-; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
-; SSE41-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE41-NEXT: pshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
-; SSE41-NEXT: pmovzxbd {{.*#+}} xmm9 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; SSE41-NEXT: movdqa %xmm4, %xmm6
-; SSE41-NEXT: psubd %xmm0, %xmm6
-; SSE41-NEXT: pminud %xmm6, %xmm4
-; SSE41-NEXT: pcmpeqd %xmm6, %xmm4
-; SSE41-NEXT: movdqa %xmm3, %xmm5
-; SSE41-NEXT: psubd %xmm9, %xmm5
-; SSE41-NEXT: pminud %xmm5, %xmm3
-; SSE41-NEXT: pcmpeqd %xmm5, %xmm3
-; SSE41-NEXT: packssdw %xmm4, %xmm3
-; SSE41-NEXT: movdqa %xmm1, %xmm0
-; SSE41-NEXT: psubd %xmm8, %xmm0
-; SSE41-NEXT: pminud %xmm0, %xmm1
-; SSE41-NEXT: pcmpeqd %xmm0, %xmm1
-; SSE41-NEXT: movdqa %xmm2, %xmm4
-; SSE41-NEXT: psubd %xmm7, %xmm4
-; SSE41-NEXT: pminud %xmm4, %xmm2
-; SSE41-NEXT: pcmpeqd %xmm4, %xmm2
-; SSE41-NEXT: packssdw %xmm2, %xmm1
-; SSE41-NEXT: packsswb %xmm3, %xmm1
-; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255]
-; SSE41-NEXT: pand %xmm2, %xmm6
-; SSE41-NEXT: pand %xmm2, %xmm5
-; SSE41-NEXT: packusdw %xmm6, %xmm5
-; SSE41-NEXT: pand %xmm2, %xmm0
-; SSE41-NEXT: pand %xmm2, %xmm4
-; SSE41-NEXT: packusdw %xmm4, %xmm0
-; SSE41-NEXT: packuswb %xmm5, %xmm0
-; SSE41-NEXT: pand %xmm1, %xmm0
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm6 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; SSE41-NEXT: pshufd {{.*#+}} xmm7 = xmm0[2,3,2,3]
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm7 = xmm7[0],zero,zero,zero,xmm7[1],zero,zero,zero,xmm7[2],zero,zero,zero,xmm7[3],zero,zero,zero
+; SSE41-NEXT: pshufd {{.*#+}} xmm8 = xmm0[3,3,3,3]
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm8 = xmm8[0],zero,zero,zero,xmm8[1],zero,zero,zero,xmm8[2],zero,zero,zero,xmm8[3],zero,zero,zero
+; SSE41-NEXT: pmaxud %xmm4, %xmm8
+; SSE41-NEXT: pcmpeqd %xmm4, %xmm8
+; SSE41-NEXT: pmaxud %xmm3, %xmm7
+; SSE41-NEXT: pcmpeqd %xmm3, %xmm7
+; SSE41-NEXT: packssdw %xmm8, %xmm7
+; SSE41-NEXT: pmaxud %xmm1, %xmm6
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm6
+; SSE41-NEXT: pmaxud %xmm2, %xmm5
+; SSE41-NEXT: pcmpeqd %xmm2, %xmm5
+; SSE41-NEXT: packssdw %xmm5, %xmm6
+; SSE41-NEXT: packsswb %xmm7, %xmm6
+; SSE41-NEXT: pmovzxbd {{.*#+}} xmm5 = [255,255,255,255]
+; SSE41-NEXT: pand %xmm5, %xmm4
+; SSE41-NEXT: pand %xmm5, %xmm3
+; SSE41-NEXT: packusdw %xmm4, %xmm3
+; SSE41-NEXT: pand %xmm5, %xmm2
+; SSE41-NEXT: pand %xmm1, %xmm5
+; SSE41-NEXT: packusdw %xmm2, %xmm5
+; SSE41-NEXT: packuswb %xmm3, %xmm5
+; SSE41-NEXT: psubb %xmm0, %xmm5
+; SSE41-NEXT: pand %xmm6, %xmm5
+; SSE41-NEXT: movdqa %xmm5, %xmm0
; SSE41-NEXT: retq
;
; AVX1-LABEL: test14:
@@ -1094,34 +923,31 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
; AVX1-NEXT: vpshufd {{.*#+}} xmm5 = xmm0[2,3,2,3]
; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,xmm5[1],zero,zero,zero,xmm5[2],zero,zero,zero,xmm5[3],zero,zero,zero
-; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
-; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm6
-; AVX1-NEXT: vpsubd %xmm0, %xmm6, %xmm0
-; AVX1-NEXT: vpminud %xmm6, %xmm0, %xmm6
-; AVX1-NEXT: vpcmpeqd %xmm6, %xmm0, %xmm6
-; AVX1-NEXT: vpsubd %xmm5, %xmm2, %xmm5
-; AVX1-NEXT: vpminud %xmm2, %xmm5, %xmm2
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm5, %xmm2
-; AVX1-NEXT: vpackssdw %xmm6, %xmm2, %xmm2
-; AVX1-NEXT: vpsubd %xmm4, %xmm1, %xmm4
-; AVX1-NEXT: vpminud %xmm1, %xmm4, %xmm6
-; AVX1-NEXT: vpcmpeqd %xmm6, %xmm4, %xmm6
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm3
-; AVX1-NEXT: vpminud %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm3, %xmm1
-; AVX1-NEXT: vpackssdw %xmm1, %xmm6, %xmm1
-; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
-; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255]
-; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX1-NEXT: vpand %xmm2, %xmm5, %xmm5
-; AVX1-NEXT: vpackusdw %xmm0, %xmm5, %xmm0
-; AVX1-NEXT: vpand %xmm2, %xmm4, %xmm4
-; AVX1-NEXT: vpand %xmm2, %xmm3, %xmm2
-; AVX1-NEXT: vpackusdw %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpackuswb %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vpand %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpshufd {{.*#+}} xmm6 = xmm0[3,3,3,3]
+; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm6 = xmm6[0],zero,zero,zero,xmm6[1],zero,zero,zero,xmm6[2],zero,zero,zero,xmm6[3],zero,zero,zero
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm7
+; AVX1-NEXT: vpmaxud %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vpcmpeqd %xmm6, %xmm7, %xmm6
+; AVX1-NEXT: vpmaxud %xmm5, %xmm2, %xmm5
+; AVX1-NEXT: vpcmpeqd %xmm5, %xmm2, %xmm5
+; AVX1-NEXT: vpackssdw %xmm6, %xmm5, %xmm5
+; AVX1-NEXT: vpmaxud %xmm4, %xmm1, %xmm4
+; AVX1-NEXT: vpcmpeqd %xmm4, %xmm1, %xmm4
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm6
+; AVX1-NEXT: vpmaxud %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpcmpeqd %xmm3, %xmm6, %xmm3
+; AVX1-NEXT: vpackssdw %xmm3, %xmm4, %xmm3
+; AVX1-NEXT: vpacksswb %xmm5, %xmm3, %xmm3
+; AVX1-NEXT: vbroadcastss {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255]
+; AVX1-NEXT: vandps %ymm4, %ymm2, %ymm2
+; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm5
+; AVX1-NEXT: vpackusdw %xmm5, %xmm2, %xmm2
+; AVX1-NEXT: vandps %ymm4, %ymm1, %ymm1
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm4
+; AVX1-NEXT: vpackusdw %xmm4, %xmm1, %xmm1
+; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpsubb %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: vpand %xmm0, %xmm3, %xmm0
; AVX1-NEXT: vzeroupper
; AVX1-NEXT: retq
;
@@ -1129,38 +955,35 @@ define <16 x i8> @test14(<16 x i8> %x, <16 x i32> %y) nounwind {
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm0[2,3,2,3]
; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero,xmm3[2],zero,zero,zero,xmm3[3],zero,zero,zero,xmm3[4],zero,zero,zero,xmm3[5],zero,zero,zero,xmm3[6],zero,zero,zero,xmm3[7],zero,zero,zero
-; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
-; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0
-; AVX2-NEXT: vpminud %ymm1, %ymm0, %ymm1
-; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm1
+; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; AVX2-NEXT: vpmaxud %ymm4, %ymm1, %ymm4
+; AVX2-NEXT: vpcmpeqd %ymm4, %ymm1, %ymm4
+; AVX2-NEXT: vpmaxud %ymm3, %ymm2, %ymm3
+; AVX2-NEXT: vpcmpeqd %ymm3, %ymm2, %ymm3
+; AVX2-NEXT: vpackssdw %ymm3, %ymm4, %ymm3
; AVX2-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
-; AVX2-NEXT: vpxor %ymm4, %ymm1, %ymm1
-; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm5
-; AVX2-NEXT: vpackssdw %xmm5, %xmm1, %xmm1
-; AVX2-NEXT: vpsubd %ymm3, %ymm2, %ymm3
-; AVX2-NEXT: vpminud %ymm2, %ymm3, %ymm2
-; AVX2-NEXT: vpcmpeqd %ymm2, %ymm3, %ymm2
-; AVX2-NEXT: vpxor %ymm4, %ymm2, %ymm2
-; AVX2-NEXT: vextracti128 $1, %ymm2, %xmm4
-; AVX2-NEXT: vpackssdw %xmm4, %xmm2, %xmm2
-; AVX2-NEXT: vpacksswb %xmm2, %xmm1, %xmm1
-; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
-; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpand %ymm2, %ymm3, %ymm2
-; AVX2-NEXT: vpackusdw %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX2-NEXT: vpackuswb %xmm2, %xmm0, %xmm0
-; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3]
-; AVX2-NEXT: vpandn %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpxor %ymm4, %ymm3, %ymm3
+; AVX2-NEXT: vextracti128 $1, %ymm3, %xmm4
+; AVX2-NEXT: vpacksswb %xmm4, %xmm3, %xmm3
+; AVX2-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,2,1,3]
+; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0]
+; AVX2-NEXT: vpand %ymm4, %ymm2, %ymm2
+; AVX2-NEXT: vpand %ymm4, %ymm1, %ymm1
+; AVX2-NEXT: vpackusdw %ymm2, %ymm1, %ymm1
+; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm2
+; AVX2-NEXT: vpackuswb %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; AVX2-NEXT: vpsubb %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: vpandn %xmm0, %xmm3, %xmm0
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
; AVX512-LABEL: test14:
; AVX512: # %bb.0: # %vector.ph
-; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
-; AVX512-NEXT: vpsubd %zmm0, %zmm1, %zmm0
-; AVX512-NEXT: vpcmpleud %zmm1, %zmm0, %k1
-; AVX512-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z}
+; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
+; AVX512-NEXT: vpcmpnltud %zmm2, %zmm1, %k1
+; AVX512-NEXT: vpmovdb %zmm1, %xmm1
+; AVX512-NEXT: vpsubb %xmm0, %xmm1, %xmm0 {%k1} {z}
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
vector.ph:
@@ -1398,26 +1221,10 @@ define <64 x i8> @test17(<64 x i8> %x, i8 zeroext %w) nounwind {
; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
; SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,0,0,0,4,5,6,7]
; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
-; SSE2-NEXT: movdqa %xmm3, %xmm5
-; SSE2-NEXT: psubb %xmm4, %xmm5
-; SSE2-NEXT: pminub %xmm5, %xmm3
-; SSE2-NEXT: pcmpeqb %xmm5, %xmm3
-; SSE2-NEXT: movdqa %xmm2, %xmm6
-; SSE2-NEXT: psubb %xmm4, %xmm6
-; SSE2-NEXT: pminub %xmm6, %xmm2
-; SSE2-NEXT: pcmpeqb %xmm6, %xmm2
-; SSE2-NEXT: movdqa %xmm1, %xmm7
-; SSE2-NEXT: psubb %xmm4, %xmm7
-; SSE2-NEXT: pminub %xmm7, %xmm1
-; SSE2-NEXT: pcmpeqb %xmm7, %xmm1
-; SSE2-NEXT: movdqa %xmm0, %xmm8
-; SSE2-NEXT: psubb %xmm4, %xmm8
-; SSE2-NEXT: pminub %xmm8, %xmm0
-; SSE2-NEXT: pcmpeqb %xmm8, %xmm0
-; SSE2-NEXT: pand %xmm8, %xmm0
-; SSE2-NEXT: pand %xmm7, %xmm1
-; SSE2-NEXT: pand %xmm6, %xmm2
-; SSE2-NEXT: pand %xmm5, %xmm3
+; SSE2-NEXT: psubusb %xmm4, %xmm0
+; SSE2-NEXT: psubusb %xmm4, %xmm1
+; SSE2-NEXT: psubusb %xmm4, %xmm2
+; SSE2-NEXT: psubusb %xmm4, %xmm3
; SSE2-NEXT: retq
;
; SSSE3-LABEL: test17:
@@ -1425,26 +1232,10 @@ define <64 x i8> @test17(<64 x i8> %x, i8 zeroext %w) nounwind {
; SSSE3-NEXT: movd %edi, %xmm4
; SSSE3-NEXT: pxor %xmm5, %xmm5
; SSSE3-NEXT: pshufb %xmm5, %xmm4
-; SSSE3-NEXT: movdqa %xmm3, %xmm5
-; SSSE3-NEXT: psubb %xmm4, %xmm5
-; SSSE3-NEXT: pminub %xmm5, %xmm3
-; SSSE3-NEXT: pcmpeqb %xmm5, %xmm3
-; SSSE3-NEXT: movdqa %xmm2, %xmm6
-; SSSE3-NEXT: psubb %xmm4, %xmm6
-; SSSE3-NEXT: pminub %xmm6, %xmm2
-; SSSE3-NEXT: pcmpeqb %xmm6, %xmm2
-; SSSE3-NEXT: movdqa %xmm1, %xmm7
-; SSSE3-NEXT: psubb %xmm4, %xmm7
-; SSSE3-NEXT: pminub %xmm7, %xmm1
-; SSSE3-NEXT: pcmpeqb %xmm7, %xmm1
-; SSSE3-NEXT: movdqa %xmm0, %xmm8
-; SSSE3-NEXT: psubb %xmm4, %xmm8
-; SSSE3-NEXT: pminub %xmm8, %xmm0
-; SSSE3-NEXT: pcmpeqb %xmm8, %xmm0
-; SSSE3-NEXT: pand %xmm8, %xmm0
-; SSSE3-NEXT: pand %xmm7, %xmm1
-; SSSE3-NEXT: pand %xmm6, %xmm2
-; SSSE3-NEXT: pand %xmm5, %xmm3
+; SSSE3-NEXT: psubusb %xmm4, %xmm0
+; SSSE3-NEXT: psubusb %xmm4, %xmm1
+; SSSE3-NEXT: psubusb %xmm4, %xmm2
+; SSSE3-NEXT: psubusb %xmm4, %xmm3
; SSSE3-NEXT: retq
;
; SSE41-LABEL: test17:
@@ -1452,26 +1243,10 @@ define <64 x i8> @test17(<64 x i8> %x, i8 zeroext %w) nounwind {
; SSE41-NEXT: movd %edi, %xmm4
; SSE41-NEXT: pxor %xmm5, %xmm5
; SSE41-NEXT: pshufb %xmm5, %xmm4
-; SSE41-NEXT: movdqa %xmm3, %xmm5
-; SSE41-NEXT: psubb %xmm4, %xmm5
-; SSE41-NEXT: pminub %xmm5, %xmm3
-; SSE41-NEXT: pcmpeqb %xmm5, %xmm3
-; SSE41-NEXT: movdqa %xmm2, %xmm6
-; SSE41-NEXT: psubb %xmm4, %xmm6
-; SSE41-NEXT: pminub %xmm6, %xmm2
-; SSE41-NEXT: pcmpeqb %xmm6, %xmm2
-; SSE41-NEXT: movdqa %xmm1, %xmm7
-; SSE41-NEXT: psubb %xmm4, %xmm7
-; SSE41-NEXT: pminub %xmm7, %xmm1
-; SSE41-NEXT: pcmpeqb %xmm7, %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm8
-; SSE41-NEXT: psubb %xmm4, %xmm8
-; SSE41-NEXT: pminub %xmm8, %xmm0
-; SSE41-NEXT: pcmpeqb %xmm8, %xmm0
-; SSE41-NEXT: pand %xmm8, %xmm0
-; SSE41-NEXT: pand %xmm7, %xmm1
-; SSE41-NEXT: pand %xmm6, %xmm2
-; SSE41-NEXT: pand %xmm5, %xmm3
+; SSE41-NEXT: psubusb %xmm4, %xmm0
+; SSE41-NEXT: psubusb %xmm4, %xmm1
+; SSE41-NEXT: psubusb %xmm4, %xmm2
+; SSE41-NEXT: psubusb %xmm4, %xmm3
; SSE41-NEXT: retq
;
; AVX1-LABEL: test17:
@@ -1479,48 +1254,28 @@ define <64 x i8> @test17(<64 x i8> %x, i8 zeroext %w) nounwind {
; AVX1-NEXT: vmovd %edi, %xmm2
; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3
; AVX1-NEXT: vpshufb %xmm3, %xmm2, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
-; AVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm4
-; AVX1-NEXT: vpminub %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vpcmpeqb %xmm3, %xmm4, %xmm3
-; AVX1-NEXT: vpsubb %xmm2, %xmm1, %xmm5
-; AVX1-NEXT: vpminub %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vpcmpeqb %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3
-; AVX1-NEXT: vpsubb %xmm2, %xmm3, %xmm6
-; AVX1-NEXT: vpminub %xmm3, %xmm6, %xmm3
-; AVX1-NEXT: vpcmpeqb %xmm3, %xmm6, %xmm3
-; AVX1-NEXT: vpsubb %xmm2, %xmm0, %xmm2
-; AVX1-NEXT: vpminub %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vpcmpeqb %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vpsubusb %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsubusb %xmm2, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm3
-; AVX1-NEXT: vandps %ymm3, %ymm1, %ymm1
-; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm2, %ymm2
-; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
+; AVX1-NEXT: vpsubusb %xmm2, %xmm3, %xmm3
+; AVX1-NEXT: vpsubusb %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test17:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vmovd %edi, %xmm2
; AVX2-NEXT: vpbroadcastb %xmm2, %ymm2
-; AVX2-NEXT: vpsubb %ymm2, %ymm1, %ymm3
-; AVX2-NEXT: vpminub %ymm1, %ymm3, %ymm1
-; AVX2-NEXT: vpcmpeqb %ymm1, %ymm3, %ymm1
-; AVX2-NEXT: vpsubb %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpminub %ymm0, %ymm2, %ymm0
-; AVX2-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0
-; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: vpsubusb %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsubusb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: test17:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastb %edi, %zmm1
-; AVX512-NEXT: vpsubb %zmm1, %zmm0, %zmm1
-; AVX512-NEXT: vpcmpleub %zmm0, %zmm1, %k1
-; AVX512-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} {z}
+; AVX512-NEXT: vpsubusb %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <64 x i8> undef, i8 %w, i32 0
@@ -1532,119 +1287,44 @@ vector.ph:
}
define <32 x i16> @test18(<32 x i16> %x, i16 zeroext %w) nounwind {
-; SSE2OR3-LABEL: test18:
-; SSE2OR3: # %bb.0: # %vector.ph
-; SSE2OR3-NEXT: movdqa %xmm0, %xmm5
-; SSE2OR3-NEXT: movd %edi, %xmm0
-; SSE2OR3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
-; SSE2OR3-NEXT: pshufd {{.*#+}} xmm8 = xmm0[0,1,0,1]
-; SSE2OR3-NEXT: movdqa {{.*#+}} xmm0 = [32768,32768,32768,32768,32768,32768,32768,32768]
-; SSE2OR3-NEXT: movdqa %xmm3, %xmm6
-; SSE2OR3-NEXT: pxor %xmm0, %xmm6
-; SSE2OR3-NEXT: psubw %xmm8, %xmm3
-; SSE2OR3-NEXT: movdqa %xmm3, %xmm4
-; SSE2OR3-NEXT: pxor %xmm0, %xmm4
-; SSE2OR3-NEXT: pcmpgtw %xmm6, %xmm4
-; SSE2OR3-NEXT: movdqa %xmm2, %xmm7
-; SSE2OR3-NEXT: pxor %xmm0, %xmm7
-; SSE2OR3-NEXT: psubw %xmm8, %xmm2
-; SSE2OR3-NEXT: movdqa %xmm2, %xmm6
-; SSE2OR3-NEXT: pxor %xmm0, %xmm6
-; SSE2OR3-NEXT: pcmpgtw %xmm7, %xmm6
-; SSE2OR3-NEXT: movdqa %xmm1, %xmm9
-; SSE2OR3-NEXT: pxor %xmm0, %xmm9
-; SSE2OR3-NEXT: psubw %xmm8, %xmm1
-; SSE2OR3-NEXT: movdqa %xmm1, %xmm7
-; SSE2OR3-NEXT: pxor %xmm0, %xmm7
-; SSE2OR3-NEXT: pcmpgtw %xmm9, %xmm7
-; SSE2OR3-NEXT: movdqa %xmm5, %xmm9
-; SSE2OR3-NEXT: pxor %xmm0, %xmm9
-; SSE2OR3-NEXT: psubw %xmm8, %xmm5
-; SSE2OR3-NEXT: pxor %xmm5, %xmm0
-; SSE2OR3-NEXT: pcmpgtw %xmm9, %xmm0
-; SSE2OR3-NEXT: pandn %xmm5, %xmm0
-; SSE2OR3-NEXT: pandn %xmm1, %xmm7
-; SSE2OR3-NEXT: pandn %xmm2, %xmm6
-; SSE2OR3-NEXT: pandn %xmm3, %xmm4
-; SSE2OR3-NEXT: movdqa %xmm7, %xmm1
-; SSE2OR3-NEXT: movdqa %xmm6, %xmm2
-; SSE2OR3-NEXT: movdqa %xmm4, %xmm3
-; SSE2OR3-NEXT: retq
-;
-; SSE41-LABEL: test18:
-; SSE41: # %bb.0: # %vector.ph
-; SSE41-NEXT: movd %edi, %xmm4
-; SSE41-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,0,0,0,4,5,6,7]
-; SSE41-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
-; SSE41-NEXT: movdqa %xmm3, %xmm5
-; SSE41-NEXT: psubw %xmm4, %xmm5
-; SSE41-NEXT: pminuw %xmm5, %xmm3
-; SSE41-NEXT: pcmpeqw %xmm5, %xmm3
-; SSE41-NEXT: movdqa %xmm2, %xmm6
-; SSE41-NEXT: psubw %xmm4, %xmm6
-; SSE41-NEXT: pminuw %xmm6, %xmm2
-; SSE41-NEXT: pcmpeqw %xmm6, %xmm2
-; SSE41-NEXT: movdqa %xmm1, %xmm7
-; SSE41-NEXT: psubw %xmm4, %xmm7
-; SSE41-NEXT: pminuw %xmm7, %xmm1
-; SSE41-NEXT: pcmpeqw %xmm7, %xmm1
-; SSE41-NEXT: movdqa %xmm0, %xmm8
-; SSE41-NEXT: psubw %xmm4, %xmm8
-; SSE41-NEXT: pminuw %xmm8, %xmm0
-; SSE41-NEXT: pcmpeqw %xmm8, %xmm0
-; SSE41-NEXT: pand %xmm8, %xmm0
-; SSE41-NEXT: pand %xmm7, %xmm1
-; SSE41-NEXT: pand %xmm6, %xmm2
-; SSE41-NEXT: pand %xmm5, %xmm3
-; SSE41-NEXT: retq
+; SSE-LABEL: test18:
+; SSE: # %bb.0: # %vector.ph
+; SSE-NEXT: movd %edi, %xmm4
+; SSE-NEXT: pshuflw {{.*#+}} xmm4 = xmm4[0,0,0,0,4,5,6,7]
+; SSE-NEXT: pshufd {{.*#+}} xmm4 = xmm4[0,1,0,1]
+; SSE-NEXT: psubusw %xmm4, %xmm0
+; SSE-NEXT: psubusw %xmm4, %xmm1
+; SSE-NEXT: psubusw %xmm4, %xmm2
+; SSE-NEXT: psubusw %xmm4, %xmm3
+; SSE-NEXT: retq
;
; AVX1-LABEL: test18:
; AVX1: # %bb.0: # %vector.ph
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
; AVX1-NEXT: vmovd %edi, %xmm3
; AVX1-NEXT: vpshuflw {{.*#+}} xmm3 = xmm3[0,0,0,0,4,5,6,7]
; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[0,1,0,1]
-; AVX1-NEXT: vpsubw %xmm3, %xmm2, %xmm4
-; AVX1-NEXT: vpminuw %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm4, %xmm2
-; AVX1-NEXT: vpsubw %xmm3, %xmm1, %xmm5
-; AVX1-NEXT: vpminuw %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vpcmpeqw %xmm1, %xmm5, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
-; AVX1-NEXT: vpsubw %xmm3, %xmm2, %xmm6
-; AVX1-NEXT: vpminuw %xmm2, %xmm6, %xmm2
-; AVX1-NEXT: vpcmpeqw %xmm2, %xmm6, %xmm2
-; AVX1-NEXT: vpsubw %xmm3, %xmm0, %xmm3
-; AVX1-NEXT: vpminuw %xmm0, %xmm3, %xmm0
-; AVX1-NEXT: vpcmpeqw %xmm0, %xmm3, %xmm0
+; AVX1-NEXT: vpsubusw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsubusw %xmm3, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
-; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm5, %ymm2
-; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm3, %ymm2
-; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0
+; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2
+; AVX1-NEXT: vpsubusw %xmm3, %xmm2, %xmm2
+; AVX1-NEXT: vpsubusw %xmm3, %xmm1, %xmm1
+; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: test18:
; AVX2: # %bb.0: # %vector.ph
; AVX2-NEXT: vmovd %edi, %xmm2
; AVX2-NEXT: vpbroadcastw %xmm2, %ymm2
-; AVX2-NEXT: vpsubw %ymm2, %ymm1, %ymm3
-; AVX2-NEXT: vpminuw %ymm1, %ymm3, %ymm1
-; AVX2-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
-; AVX2-NEXT: vpsubw %ymm2, %ymm0, %ymm2
-; AVX2-NEXT: vpminuw %ymm0, %ymm2, %ymm0
-; AVX2-NEXT: vpcmpeqw %ymm0, %ymm2, %ymm0
-; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0
-; AVX2-NEXT: vpand %ymm3, %ymm1, %ymm1
+; AVX2-NEXT: vpsubusw %ymm2, %ymm0, %ymm0
+; AVX2-NEXT: vpsubusw %ymm2, %ymm1, %ymm1
; AVX2-NEXT: retq
;
; AVX512-LABEL: test18:
; AVX512: # %bb.0: # %vector.ph
; AVX512-NEXT: vpbroadcastw %edi, %zmm1
-; AVX512-NEXT: vpsubw %zmm1, %zmm0, %zmm1
-; AVX512-NEXT: vpcmpleuw %zmm0, %zmm1, %k1
-; AVX512-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z}
+; AVX512-NEXT: vpsubusw %zmm1, %zmm0, %zmm0
; AVX512-NEXT: retq
vector.ph:
%0 = insertelement <32 x i16> undef, i16 %w, i32 0
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
index 4e31b48ec5cec..1b307b30d8c0d 100644
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -2065,10 +2065,11 @@ define i64 @PR51612(i64 %x, i64 %y) {
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %eax
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %ecx
; ATHLON-NEXT: movl {{[0-9]+}}(%esp), %edx
-; ATHLON-NEXT: incl %edx
-; ATHLON-NEXT: addl $1, %eax
-; ATHLON-NEXT: adcl $0, %ecx
-; ATHLON-NEXT: cmovbl %edx, %eax
+; ATHLON-NEXT: addl $1, %ecx
+; ATHLON-NEXT: adcl $0, %edx
+; ATHLON-NEXT: incl %eax
+; ATHLON-NEXT: orl %ecx, %edx
+; ATHLON-NEXT: cmovnel %ecx, %eax
; ATHLON-NEXT: andl 10, %eax
; ATHLON-NEXT: xorl %edx, %edx
; ATHLON-NEXT: retl
@@ -2077,7 +2078,8 @@ define i64 @PR51612(i64 %x, i64 %y) {
; MCU: # %bb.0:
; MCU-NEXT: addl $1, %eax
; MCU-NEXT: adcl $0, %edx
-; MCU-NEXT: jae .LBB45_2
+; MCU-NEXT: orl %eax, %edx
+; MCU-NEXT: jne .LBB45_2
; MCU-NEXT: # %bb.1:
; MCU-NEXT: movl {{[0-9]+}}(%esp), %eax
; MCU-NEXT: incl %eax
diff --git a/test_direct_uaddo.ll b/test_direct_uaddo.ll
deleted file mode 100644
index a923d212bbf90..0000000000000
--- a/test_direct_uaddo.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-define i32 @test_direct_uaddo(i32 %x, i32 %y) {
- %result = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
- %ovf = extractvalue {i32, i1} %result, 1
- %val = extractvalue {i32, i1} %result, 0
- %sel = select i1 %ovf, i32 -1, i32 %val
- ret i32 %sel
-}
-
-declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
diff --git a/test_sat_pattern.ll b/test_sat_pattern.ll
deleted file mode 100644
index 150c8081a77ac..0000000000000
--- a/test_sat_pattern.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-define <8 x i16> @test_sat_pattern(<8 x i16> %x, <8 x i16> %y) {
- %a = add <8 x i16> %x, %y
- %c = icmp ugt <8 x i16> %x, %a
- %r = select <8 x i1> %c, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %a
- ret <8 x i16> %r
-}
diff --git a/test_sat_pattern.s b/test_sat_pattern.s
deleted file mode 100644
index e69de29bb2d1d..0000000000000
diff --git a/test_scalar_sat.ll b/test_scalar_sat.ll
deleted file mode 100644
index 6ef9729e66a75..0000000000000
--- a/test_scalar_sat.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-define i8 @test_scalar_sat(i8 %x) {
- %a = add i8 %x, 42
- %c = icmp ugt i8 %x, %a
- %r = select i1 %c, i8 -1, i8 %a
- ret i8 %r
-}
diff --git a/test_uaddo_conversion.ll b/test_uaddo_conversion.ll
deleted file mode 100644
index ca433863997b7..0000000000000
--- a/test_uaddo_conversion.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-define i32 @test_uaddo_to_uaddsat(i32 %x, i32 %y) {
- %result = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
- %val = extractvalue {i32, i1} %result, 0
- %ovf = extractvalue {i32, i1} %result, 1
- %sel = select i1 %ovf, i32 -1, i32 %val
- ret i32 %sel
-}
-
-declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
diff --git a/test_uaddo_only.ll b/test_uaddo_only.ll
deleted file mode 100644
index 4f7056148fa99..0000000000000
--- a/test_uaddo_only.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-define i32 @test_uaddo_only(i32 %x, i32 %y) {
- %add = add i32 %x, %y
- %cmp = icmp ugt i32 %x, %add
- %sel = select i1 %cmp, i32 -1, i32 %add
- ret i32 %sel
-}
diff --git a/test_uaddo_only.s b/test_uaddo_only.s
deleted file mode 100644
index e04ea329bd8e9..0000000000000
--- a/test_uaddo_only.s
+++ /dev/null
@@ -1,22 +0,0 @@
- .abiversion 2
- .file "test_uaddo_only.ll"
- .text
- .globl test_uaddo_only # -- Begin function test_uaddo_only
- .p2align 4
- .type test_uaddo_only, at function
-test_uaddo_only: # @test_uaddo_only
-.Lfunc_begin0:
- .cfi_startproc
-# %bb.0:
- add 4, 3, 4
- cmplw 4, 3
- li 3, -1
- isellt 3, 3, 4
- blr
- .long 0
- .quad 0
-.Lfunc_end0:
- .size test_uaddo_only, .Lfunc_end0-.Lfunc_begin0
- .cfi_endproc
- # -- End function
- .section ".note.GNU-stack","", at progbits
diff --git a/test_uaddsat.ll b/test_uaddsat.ll
deleted file mode 100644
index 0c5423504fb48..0000000000000
--- a/test_uaddsat.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; Test file to verify uaddo -> uaddsat conversion
-define i32 @test_uaddsat_pattern(i32 %x, i32 %y) {
- %add = add i32 %x, %y
- %cmp = icmp ugt i32 %x, %add
- %sel = select i1 %cmp, i32 -1, i32 %add
- ret i32 %sel
-}
-
-declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
diff --git a/test_usubo.ll b/test_usubo.ll
deleted file mode 100644
index e588f43f3cec9..0000000000000
--- a/test_usubo.ll
+++ /dev/null
@@ -1,15 +0,0 @@
-; Test file to verify usubo -> usubsat conversion
-define i32 @test_usubo_to_usubsat(i32 %x, i32 %y) {
- %result = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %x, i32 %y)
- %val = extractvalue {i32, i1} %result, 0
- ret i32 %val
-}
-
-define i32 @test_uaddo_to_uaddsat(i32 %x, i32 %y) {
- %result = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
- %val = extractvalue {i32, i1} %result, 0
- ret i32 %val
-}
-
-declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32)
-declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32)
diff --git a/test_vector_uaddo.ll b/test_vector_uaddo.ll
deleted file mode 100644
index 8105ed0041f54..0000000000000
--- a/test_vector_uaddo.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-define <8 x i16> @test_vector_uaddo(<8 x i16> %x, <8 x i16> %y) {
- %result = call { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16> %x, <8 x i16> %y)
- %ovf = extractvalue { <8 x i16>, <8 x i1> } %result, 1
- %val = extractvalue { <8 x i16>, <8 x i1> } %result, 0
- %sel = select <8 x i1> %ovf, <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>, <8 x i16> %val
- ret <8 x i16> %sel
-}
-
-declare { <8 x i16>, <8 x i1> } @llvm.uadd.with.overflow.v8i16(<8 x i16>, <8 x i16>)
diff --git a/test_vector_uaddo.s b/test_vector_uaddo.s
deleted file mode 100644
index 5834fc58aa562..0000000000000
--- a/test_vector_uaddo.s
+++ /dev/null
@@ -1,21 +0,0 @@
- .abiversion 2
- .file "test_vector_uaddo.ll"
- .text
- .globl test_vector_uaddo # -- Begin function test_vector_uaddo
- .p2align 4
- .type test_vector_uaddo, at function
-test_vector_uaddo: # @test_vector_uaddo
-.Lfunc_begin0:
- .cfi_startproc
-# %bb.0:
- vadduhm 3, 2, 3
- vcmpgtuh 2, 2, 3
- xxlor 34, 34, 35
- blr
- .long 0
- .quad 0
-.Lfunc_end0:
- .size test_vector_uaddo, .Lfunc_end0-.Lfunc_begin0
- .cfi_endproc
- # -- End function
- .section ".note.GNU-stack","", at progbits
diff --git a/trace_uaddsat.ll b/trace_uaddsat.ll
deleted file mode 100644
index 8fccd2816d67f..0000000000000
--- a/trace_uaddsat.ll
+++ /dev/null
@@ -1,6 +0,0 @@
-define i32 @test_uaddsat(i32 %x, i32 %y) {
- %add = add i32 %x, %y
- %cmp = icmp ugt i32 %x, %add
- %sel = select i1 %cmp, i32 -1, i32 %add
- ret i32 %sel
-}
>From 39d54e73823be300229a3d628bbca276c5602f34 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Wed, 24 Sep 2025 12:42:34 -0400
Subject: [PATCH 07/12] Update TargetLowering.h
---
llvm/include/llvm/CodeGen/TargetLowering.h | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index c874fc9a36e1c..37406832dbfb1 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3460,6 +3460,8 @@ class LLVM_ABI TargetLoweringBase {
// besides the overflow check. On some targets (e.g. SPARC), it is
// not profitable to form on overflow op if the math result has no
// concrete users.
+ if (VT.isVector())
+ return false;
return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT));
}
>From 0c40916a5a6f4e4709b26b3ebb545f1e0391e833 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Wed, 24 Sep 2025 12:45:50 -0400
Subject: [PATCH 08/12] Update DAGCombiner.cpp
---
llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 25 -------------------
1 file changed, 25 deletions(-)
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index db2fc895cf09f..a6ba6e518899f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -13347,31 +13347,6 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
DAG.getNode(ISD::AND, DL, N0.getValueType(), N1.getOperand(1), N0));
}
- // vselect uaddo(x, y).overflow, -1, uaddo(x, y) -> uaddsat(x, y)
- // This converts the pattern created by CodeGenPrepare back to uaddsat
- // Handle the case where overflow might be sign-extended
- if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG) {
- // Look through sign_extend_inreg to find the actual overflow flag
- (void)N0.getOperand(0);
- if ((N1.getOpcode() == ISD::UADDO && N1.getResNo() == 0 &&
- ISD::isConstantSplatVectorAllOnes(N2.getNode())) ||
- (N2.getOpcode() == ISD::UADDO && N2.getResNo() == 0 &&
- ISD::isConstantSplatVectorAllOnes(N1.getNode()))) {
- LLVM_DEBUG(dbgs() << "Converting uaddo to uaddsat\n");
- return DAG.getNode(ISD::UADDSAT, DL, VT,
- N1.getOpcode() == ISD::UADDO ? N1.getOperand(0) : N2.getOperand(0),
- N1.getOpcode() == ISD::UADDO ? N1.getOperand(1) : N2.getOperand(1));
- }
- } else if ((N1.getOpcode() == ISD::UADDO && N1.getResNo() == 0 &&
- ISD::isConstantSplatVectorAllOnes(N2.getNode())) ||
- (N2.getOpcode() == ISD::UADDO && N2.getResNo() == 0 &&
- ISD::isConstantSplatVectorAllOnes(N1.getNode()))) {
- LLVM_DEBUG(dbgs() << "Converting uaddo to uaddsat\n");
- return DAG.getNode(ISD::UADDSAT, DL, VT,
- N1.getOpcode() == ISD::UADDO ? N1.getOperand(0) : N2.getOperand(0),
- N1.getOpcode() == ISD::UADDO ? N1.getOperand(1) : N2.getOperand(1));
- }
-
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
// vselect (setgt X, -1), X, -X ->
>From c0a371141983d176b208a7133a4b0faec1c9f94f Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Wed, 24 Sep 2025 12:58:19 -0400
Subject: [PATCH 09/12] n
---
llvm/test/CodeGen/PowerPC/sat-add.ll | 52 +++++++++++++---------------
1 file changed, 24 insertions(+), 28 deletions(-)
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index 771c2ca0a866c..34b703a981105 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -24,12 +24,11 @@ define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 4, 3, 24
+; CHECK-NEXT: clrlwi 3, 3, 24
; CHECK-NEXT: addi 3, 3, 42
-; CHECK-NEXT: clrlwi 5, 3, 24
-; CHECK-NEXT: cmplw 4, 5
+; CHECK-NEXT: andi. 4, 3, 256
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iselgt 3, 4, 3
+; CHECK-NEXT: iseleq 3, 3, 4
; CHECK-NEXT: blr
%a = add i8 %x, 42
%c = icmp ugt i8 %x, %a
@@ -70,12 +69,11 @@ define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 4, 3, 16
+; CHECK-NEXT: clrlwi 3, 3, 16
; CHECK-NEXT: addi 3, 3, 42
-; CHECK-NEXT: clrlwi 5, 3, 16
-; CHECK-NEXT: cmplw 4, 5
+; CHECK-NEXT: andis. 4, 3, 1
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iselgt 3, 4, 3
+; CHECK-NEXT: iseleq 3, 3, 4
; CHECK-NEXT: blr
%a = add i16 %x, 42
%c = icmp ugt i16 %x, %a
@@ -116,9 +114,9 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
; CHECK-LABEL: unsigned_sat_constant_i32_using_cmp_sum:
; CHECK: # %bb.0:
; CHECK-NEXT: addi 4, 3, 42
-; CHECK-NEXT: cmplw 3, 4
+; CHECK-NEXT: cmplw 4, 3
; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: iselgt 3, 3, 4
+; CHECK-NEXT: isellt 3, 3, 4
; CHECK-NEXT: blr
%a = add i32 %x, 42
%c = icmp ugt i32 %x, %a
@@ -205,12 +203,12 @@ define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 5, 3, 24
+; CHECK-NEXT: clrlwi 4, 4, 24
+; CHECK-NEXT: clrlwi 3, 3, 24
; CHECK-NEXT: add 3, 3, 4
-; CHECK-NEXT: clrlwi 4, 3, 24
-; CHECK-NEXT: cmplw 5, 4
+; CHECK-NEXT: andi. 4, 3, 256
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iselgt 3, 4, 3
+; CHECK-NEXT: iseleq 3, 3, 4
; CHECK-NEXT: blr
%a = add i8 %x, %y
%c = icmp ugt i8 %x, %a
@@ -256,12 +254,12 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: clrlwi 5, 3, 16
+; CHECK-NEXT: clrlwi 4, 4, 16
+; CHECK-NEXT: clrlwi 3, 3, 16
; CHECK-NEXT: add 3, 3, 4
-; CHECK-NEXT: clrlwi 4, 3, 16
-; CHECK-NEXT: cmplw 5, 4
+; CHECK-NEXT: andis. 4, 3, 1
; CHECK-NEXT: li 4, -1
-; CHECK-NEXT: iselgt 3, 4, 3
+; CHECK-NEXT: iseleq 3, 3, 4
; CHECK-NEXT: blr
%a = add i16 %x, %y
%c = icmp ugt i16 %x, %a
@@ -306,9 +304,9 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
; CHECK-LABEL: unsigned_sat_variable_i32_using_cmp_sum:
; CHECK: # %bb.0:
; CHECK-NEXT: add 4, 3, 4
-; CHECK-NEXT: cmplw 3, 4
+; CHECK-NEXT: cmplw 4, 3
; CHECK-NEXT: li 3, -1
-; CHECK-NEXT: iselgt 3, 3, 4
+; CHECK-NEXT: isellt 3, 3, 4
; CHECK-NEXT: blr
%a = add i32 %x, %y
%c = icmp ugt i32 %x, %a
@@ -540,11 +538,9 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) {
; CHECK-NEXT: addis 3, 2, .LCPI34_0 at toc@ha
; CHECK-NEXT: addi 3, 3, .LCPI34_0 at toc@l
; CHECK-NEXT: lxvd2x 35, 0, 3
-; CHECK-NEXT: addis 3, 2, .LCPI34_1 at toc@ha
-; CHECK-NEXT: addi 3, 3, .LCPI34_1 at toc@l
-; CHECK-NEXT: lxvd2x 36, 0, 3
-; CHECK-NEXT: vminud 2, 2, 4
-; CHECK-NEXT: vaddudm 2, 2, 3
+; CHECK-NEXT: vaddudm 3, 2, 3
+; CHECK-NEXT: vcmpgtud 2, 2, 3
+; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: blr
%a = add <2 x i64> %x, <i64 42, i64 42>
%c = icmp ugt <2 x i64> %x, %a
@@ -708,9 +704,9 @@ define <2 x i64> @unsigned_sat_variable_v2i64_using_min(<2 x i64> %x, <2 x i64>
define <2 x i64> @unsigned_sat_variable_v2i64_using_cmp_sum(<2 x i64> %x, <2 x i64> %y) {
; CHECK-LABEL: unsigned_sat_variable_v2i64_using_cmp_sum:
; CHECK: # %bb.0:
-; CHECK-NEXT: xxlnor 36, 35, 35
-; CHECK-NEXT: vminud 2, 2, 4
-; CHECK-NEXT: vaddudm 2, 2, 3
+; CHECK-NEXT: vaddudm 3, 2, 3
+; CHECK-NEXT: vcmpgtud 2, 2, 3
+; CHECK-NEXT: xxlor 34, 34, 35
; CHECK-NEXT: blr
%a = add <2 x i64> %x, %y
%c = icmp ugt <2 x i64> %x, %a
>From cf4f237d1e6f07ad3197d03ecf6fa3e8869ef07a Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Wed, 24 Sep 2025 14:39:56 -0400
Subject: [PATCH 10/12] n
---
llvm/include/llvm/CodeGen/TargetLowering.h | 2 --
llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++++
2 files changed, 4 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 37406832dbfb1..c874fc9a36e1c 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3460,8 +3460,6 @@ class LLVM_ABI TargetLoweringBase {
// besides the overflow check. On some targets (e.g. SPARC), it is
// not profitable to form on overflow op if the math result has no
// concrete users.
- if (VT.isVector())
- return false;
return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT));
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a237635b4541a..0aa540321b150 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3406,6 +3406,10 @@ bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
bool) const {
+ // TODO: Allow vectors?
+ if (VT.isVector())
+ return false;
+
return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
}
>From 9d9cd9d618eedabffd99968a7e64245f31e88ebe Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Thu, 25 Sep 2025 09:11:16 -0400
Subject: [PATCH 11/12] h
---
llvm/include/llvm/CodeGen/TargetLowering.h | 2 ++
llvm/lib/Target/X86/X86ISelLowering.cpp | 5 -----
2 files changed, 2 insertions(+), 5 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index c874fc9a36e1c..37406832dbfb1 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3460,6 +3460,8 @@ class LLVM_ABI TargetLoweringBase {
// besides the overflow check. On some targets (e.g. SPARC), it is
// not profitable to form on overflow op if the math result has no
// concrete users.
+ if (VT.isVector())
+ return false;
return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT));
}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0aa540321b150..e24de87738a8a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3405,11 +3405,6 @@ bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
bool) const {
-
- // TODO: Allow vectors?
- if (VT.isVector())
- return false;
-
return TargetLowering::shouldFormOverflowOp(Opcode, VT, true);
}
>From e7a1fef510c5a6d5deecc97f412004b62b3d0196 Mon Sep 17 00:00:00 2001
From: AZero13 <gfunni234 at gmail.com>
Date: Thu, 25 Sep 2025 09:21:21 -0400
Subject: [PATCH 12/12] h
---
llvm/include/llvm/CodeGen/TargetLowering.h | 3 ++-
llvm/lib/Target/RISCV/RISCVISelLowering.h | 8 --------
2 files changed, 2 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 37406832dbfb1..291ef5ef9ba83 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3462,7 +3462,8 @@ class LLVM_ABI TargetLoweringBase {
// concrete users.
if (VT.isVector())
return false;
- return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT));
+ return MathUsed &&
+ (isTypeLegal(VT) || isOperationLegalOrCustomOrPromote(Opcode, VT));
}
// Return true if it is profitable to use a scalar input to a BUILD_VECTOR
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 3f81ed74c12ed..54f12e76a4845 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -201,14 +201,6 @@ class RISCVTargetLowering : public TargetLowering {
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
EVT VT) const override;
- bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
- bool MathUsed) const override {
- if (VT == MVT::i8 || VT == MVT::i16)
- return false;
-
- return TargetLowering::shouldFormOverflowOp(Opcode, VT, MathUsed);
- }
-
bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
unsigned AddrSpace) const override {
// If we can replace 4 or more scalar stores, there will be a reduction
More information about the llvm-commits
mailing list